commit c702d6babc07ca1d43523fdd4191836c31ecfd18
Author: NullOnRise <112127708+NullGang@users.noreply.github.com>
Date: Tue Oct 1 22:35:06 2024 +0200
upd
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..76edc49
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,272 @@
+# Remove the line below if you want to inherit .editorconfig settings from higher directories
+root = true
+
+[*]
+
+#### Core EditorConfig Options ####
+
+# Set default charset
+charset = utf-8
+
+# Indentation and spacing
+indent_size = 4
+indent_style = space
+tab_width = 4
+
+# New line preferences
+end_of_line = lf
+insert_final_newline = true
+
+# Markdown, JSON, YAML, props and csproj files
+[*.{md,json,yml,props,csproj}]
+
+# Indentation and spacing
+indent_size = 2
+tab_width = 2
+
+# C# files
+[*.cs]
+
+#### .NET Coding Conventions ####
+
+# Organize usings
+dotnet_separate_import_directive_groups = false
+dotnet_sort_system_directives_first = false
+
+# this. and Me. preferences
+dotnet_style_qualification_for_event = false:silent
+dotnet_style_qualification_for_field = false:silent
+dotnet_style_qualification_for_method = false:silent
+dotnet_style_qualification_for_property = false:silent
+
+# Language keywords vs BCL types preferences
+dotnet_style_predefined_type_for_locals_parameters_members = true:silent
+dotnet_style_predefined_type_for_member_access = true:silent
+
+# Parentheses preferences
+dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent
+dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent
+dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent
+dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent
+
+# Modifier preferences
+dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent
+
+# Expression-level preferences
+dotnet_style_coalesce_expression = true:suggestion
+dotnet_style_collection_initializer = true:suggestion
+dotnet_style_explicit_tuple_names = true:suggestion
+dotnet_style_null_propagation = true:suggestion
+dotnet_style_object_initializer = true:suggestion
+dotnet_style_prefer_auto_properties = true:silent
+dotnet_style_prefer_compound_assignment = true:suggestion
+dotnet_style_prefer_conditional_expression_over_assignment = true:silent
+dotnet_style_prefer_conditional_expression_over_return = true:silent
+dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
+dotnet_style_prefer_inferred_tuple_names = true:suggestion
+dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
+dotnet_style_prefer_simplified_interpolation = true:suggestion
+
+# Field preferences
+dotnet_style_readonly_field = true:suggestion
+
+# Parameter preferences
+dotnet_code_quality_unused_parameters = all:silent
+
+#### C# Coding Conventions ####
+
+# Namespace preferences
+csharp_style_namespace_declarations = block_scoped:warning
+resharper_csharp_namespace_body = block_scoped
+
+# var preferences
+csharp_style_var_elsewhere = false:silent
+csharp_style_var_for_built_in_types = false:silent
+csharp_style_var_when_type_is_apparent = false:silent
+
+# Expression-bodied members
+csharp_style_expression_bodied_accessors = true:silent
+csharp_style_expression_bodied_constructors = false:silent
+csharp_style_expression_bodied_indexers = true:silent
+csharp_style_expression_bodied_lambdas = true:silent
+csharp_style_expression_bodied_local_functions = false:silent
+csharp_style_expression_bodied_methods = false:silent
+csharp_style_expression_bodied_operators = false:silent
+csharp_style_expression_bodied_properties = true:silent
+
+# Pattern matching preferences
+csharp_style_pattern_matching_over_as_with_null_check = true:suggestion
+csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion
+csharp_style_prefer_switch_expression = false:silent
+
+# Null-checking preferences
+csharp_style_conditional_delegate_call = true:suggestion
+
+# Modifier preferences
+csharp_prefer_static_local_function = true:suggestion
+csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:silent
+csharp_style_prefer_readonly_struct = true
+csharp_style_prefer_method_group_conversion = true
+
+# Code-block preferences
+csharp_prefer_braces = true:silent
+csharp_prefer_simple_using_statement = true:suggestion
+
+# Expression-level preferences
+csharp_prefer_simple_default_expression = true:suggestion
+csharp_style_deconstructed_variable_declaration = true:suggestion
+csharp_style_inlined_variable_declaration = true:suggestion
+csharp_style_pattern_local_over_anonymous_function = true:suggestion
+csharp_style_prefer_index_operator = true:suggestion
+csharp_style_prefer_range_operator = true:suggestion
+csharp_style_throw_expression = true:suggestion
+csharp_style_unused_value_assignment_preference = discard_variable:suggestion
+csharp_style_unused_value_expression_statement_preference = discard_variable:silent
+csharp_style_implicit_object_creation_when_type_is_apparent = true
+
+# 'using' directive preferences
+csharp_using_directive_placement = outside_namespace:silent
+
+#### C# Formatting Rules ####
+
+# New line preferences
+csharp_new_line_before_catch = true
+csharp_new_line_before_else = true
+csharp_new_line_before_finally = true
+csharp_new_line_before_members_in_anonymous_types = true
+csharp_new_line_before_members_in_object_initializers = true
+csharp_new_line_before_open_brace = all
+csharp_new_line_between_query_expression_clauses = true
+
+# Indentation preferences
+csharp_indent_block_contents = true
+csharp_indent_braces = false
+csharp_indent_case_contents = true
+csharp_indent_case_contents_when_block = true
+csharp_indent_labels = one_less_than_current
+csharp_indent_switch_labels = true
+
+# Space preferences
+csharp_space_after_cast = false
+csharp_space_after_colon_in_inheritance_clause = true
+csharp_space_after_comma = true
+csharp_space_after_dot = false
+csharp_space_after_keywords_in_control_flow_statements = true
+csharp_space_after_semicolon_in_for_statement = true
+csharp_space_around_binary_operators = before_and_after
+csharp_space_before_colon_in_inheritance_clause = true
+csharp_space_before_comma = false
+csharp_space_before_dot = false
+csharp_space_before_open_square_brackets = false
+csharp_space_before_semicolon_in_for_statement = false
+csharp_space_between_empty_square_brackets = false
+csharp_space_between_method_call_empty_parameter_list_parentheses = false
+csharp_space_between_method_call_name_and_opening_parenthesis = false
+csharp_space_between_method_call_parameter_list_parentheses = false
+csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
+csharp_space_between_method_declaration_name_and_open_parenthesis = false
+csharp_space_between_method_declaration_parameter_list_parentheses = false
+csharp_space_between_parentheses = false
+csharp_space_between_square_brackets = false
+
+# Wrapping preferences
+csharp_preserve_single_line_blocks = true
+csharp_preserve_single_line_statements = false
+
+#### Naming styles ####
+
+# Naming rules
+
+dotnet_naming_rule.interfaces_should_be_prefixed_with_I.severity = suggestion
+dotnet_naming_rule.interfaces_should_be_prefixed_with_I.symbols = interface
+dotnet_naming_rule.interfaces_should_be_prefixed_with_I.style = IPascalCase
+
+dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion
+dotnet_naming_rule.types_should_be_pascal_case.symbols = types
+dotnet_naming_rule.types_should_be_pascal_case.style = PascalCase
+
+dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion
+dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members
+dotnet_naming_rule.non_field_members_should_be_pascal_case.style = PascalCase
+
+dotnet_naming_rule.private_static_readonly_fields_should_be_camel_case_and_prefixed_with__.symbols = private_static_readonly_fields
+dotnet_naming_rule.private_static_readonly_fields_should_be_camel_case_and_prefixed_with__.severity = suggestion
+dotnet_naming_rule.private_static_readonly_fields_should_be_camel_case_and_prefixed_with__.style = _camelCase
+
+dotnet_naming_rule.local_constants_should_be_pascal_case.symbols = local_constants
+dotnet_naming_rule.local_constants_should_be_pascal_case.severity = suggestion
+dotnet_naming_rule.local_constants_should_be_pascal_case.style = PascalCase
+
+# Symbol specifications
+
+dotnet_naming_symbols.interface.applicable_kinds = interface
+dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
+dotnet_naming_symbols.interface.required_modifiers =
+
+dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum
+dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
+dotnet_naming_symbols.types.required_modifiers =
+
+dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method
+dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
+dotnet_naming_symbols.non_field_members.required_modifiers =
+
+dotnet_naming_symbols.private_static_readonly_fields.applicable_kinds = field
+dotnet_naming_symbols.private_static_readonly_fields.applicable_accessibilities = private
+dotnet_naming_symbols.private_static_readonly_fields.required_modifiers = static, readonly
+
+dotnet_naming_symbols.local_constants.applicable_kinds = local
+dotnet_naming_symbols.local_constants.applicable_accessibilities = local
+dotnet_naming_symbols.local_constants.required_modifiers = const
+
+# Naming styles
+
+dotnet_naming_style._camelCase.required_prefix = _
+dotnet_naming_style._camelCase.required_suffix =
+dotnet_naming_style._camelCase.word_separator =
+dotnet_naming_style._camelCase.capitalization = camel_case
+
+dotnet_naming_style.PascalCase.required_prefix =
+dotnet_naming_style.PascalCase.required_suffix =
+dotnet_naming_style.PascalCase.word_separator =
+dotnet_naming_style.PascalCase.capitalization = pascal_case
+
+dotnet_naming_style.IPascalCase.required_prefix = I
+dotnet_naming_style.IPascalCase.required_suffix =
+dotnet_naming_style.IPascalCase.word_separator =
+dotnet_naming_style.IPascalCase.capitalization = pascal_case
+
+# TODO:
+# .NET 8 migration (new warnings are caused by the NET 8 C# compiler and analyzer)
+# The following info messages might need to be fixed in the source code instead of hiding the actual message
+# Without the following lines, dotnet format would fail
+# Disable "Collection initialization can be simplified"
+dotnet_diagnostic.IDE0028.severity = none
+dotnet_diagnostic.IDE0300.severity = none
+dotnet_diagnostic.IDE0301.severity = none
+dotnet_diagnostic.IDE0302.severity = none
+dotnet_diagnostic.IDE0305.severity = none
+# Disable "'new' expression can be simplified"
+dotnet_diagnostic.IDE0090.severity = none
+# Disable "Use primary constructor"
+dotnet_diagnostic.IDE0290.severity = none
+# Disable "Member '' does not access instance data and can be marked as static"
+dotnet_diagnostic.CA1822.severity = none
+# Disable "Change type of field '' from '' to '' for improved performance"
+dotnet_diagnostic.CA1859.severity = none
+# Disable "Prefer 'static readonly' fields over constant array arguments if the called method is called repeatedly and is not mutating the passed array"
+dotnet_diagnostic.CA1861.severity = none
+# Disable "Prefer using 'string.Equals(string, StringComparison)' to perform a case-insensitive comparison, but keep in mind that this might cause subtle changes in behavior, so make sure to conduct thorough testing after applying the suggestion, or if culturally sensitive comparison is not required, consider using 'StringComparison.OrdinalIgnoreCase'"
+dotnet_diagnostic.CA1862.severity = none
+
+[src/Ryujinx/UI/ViewModels/**.cs]
+# Disable "mark members as static" rule for ViewModels
+dotnet_diagnostic.CA1822.severity = none
+
+[src/Ryujinx.HLE/HOS/Services/**.cs]
+# Disable "mark members as static" rule for services
+dotnet_diagnostic.CA1822.severity = none
+
+[src/Ryujinx.Tests/Cpu/*.cs]
+# Disable naming rules for CPU tests
+dotnet_diagnostic.IDE1006.severity = none
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..e39a7f1
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+###############################################################################
+# Set default behavior to automatically normalize line endings.
+###############################################################################
+* text=auto eol=lf
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..37b419d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,175 @@
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.sln.docstates
+.vs
+.vscode
+
+# Build results
+
+[Dd]ebug/
+[Rr]elease/
+x64/
+build/
+[Bb]in/
+[Oo]bj/
+
+# Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
+!packages/*/build/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+*_i.c
+*_p.c
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.log
+*.scc
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opensdf
+*.sdf
+*.cachefile
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# Rider is a Visual Studio alternative
+.idea/*
+
+# NCrunch
+*.ncrunch*
+.*crunch*.local.xml
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.Publish.xml
+
+# NuGet Packages Directory
+## TODO: If you have NuGet Package Restore enabled, uncomment the next line
+#packages/
+
+# Windows Azure Build Output
+csx
+*.build.csdef
+
+# Windows Store app package directory
+AppPackages/
+
+# Others
+sql/
+*.Cache
+ClientBin/
+[Ss]tyle[Cc]op.*
+~$*
+*~
+*.dbmdl
+*.[Pp]ublish.xml
+*.pfx
+*.publishsettings
+packages/*
+*.config
+
+# Include nuget.config
+!nuget.config
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file to a newer
+# Visual Studio version. Backup files are not needed, because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+
+# SQL Server files
+App_Data/*.mdf
+App_Data/*.ldf
+
+
+#LightSwitch generated files
+GeneratedArtifacts/
+_Pvt_Extensions/
+ModelManifest.xml
+
+# =========================
+# Windows detritus
+# =========================
+
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Mac desktop service store files
+.DS_Store
+
+# VS Launch Settings
+launchSettings.json
+
+# NetCore Publishing Profiles
+PublishProfiles/
+
+# Glade backup files
+*.glade~
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..366eb84
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,147 @@
+# Contribution to Ryujinx
+
+You can contribute to Ryujinx with PRs, testing of PRs and issues. Contributing code and other implementations is greatly appreciated alongside simply filing issues for problems you encounter.
+Please read the entire document before continuing as it can potentially save everyone involved a significant amount of time.
+
+# Quick Links
+
+* [Code Style Documentation](docs/coding-guidelines/coding-style.md)
+* [Pull Request Guidelines](docs/workflow/pr-guide.md)
+
+## Reporting Issues
+
+We always welcome bug reports, feature proposals and overall feedback. Here are a few tips on how you can make reporting your issue as effective as possible.
+
+### Identify Where to Report
+
+The Ryujinx codebase is distributed across multiple repositories in the [Ryujinx organization](https://github.com/Ryujinx). Depending on the feedback you might want to file the issue on a different repo. Here are a few common repos:
+
+* [Ryujinx/Ryujinx](https://github.com/Ryujinx/Ryujinx) Ryujinx core project files.
+* [Ryujinx/Ryujinx-Games-List](https://github.com/Ryujinx/Ryujinx-Games-List) Ryujinx game compatibility list.
+* [Ryujinx/Ryujinx-Website](https://github.com/Ryujinx/Ryujinx-Website) Ryujinx website source code.
+* [Ryujinx/Ryujinx-Ldn-Website](https://github.com/Ryujinx/Ryujinx-Ldn-Website) Ryujinx LDN website source code.
+
+### Finding Existing Issues
+
+Before filing a new issue, please search our [open issues](https://github.com/Ryujinx/Ryujinx/issues) to check if it already exists.
+
+If you do find an existing issue, please include your own feedback in the discussion. Do consider upvoting (👍 reaction) the original post, as this helps us prioritize popular issues in our backlog.
+
+### Writing a Good Feature Request
+
+Please review any feature requests already opened to both check it has not already been suggested, and to familiarize yourself with the format. When ready to submit a proposal, please use the [Feature Request issue template](https://github.com/Ryujinx/Ryujinx/issues/new?assignees=&labels=&projects=&template=feature_request.yml&title=%5BFeature+Request%5D).
+
+### Writing a Good Bug Report
+
+Good bug reports make it easier for maintainers to verify and root cause the underlying problem. The better a bug report, the faster the problem will be resolved.
+Ideally, a bug report should contain the following information:
+
+* A high-level description of the problem.
+* A _minimal reproduction_, i.e. the smallest time commitment/configuration required to reproduce the wrong behavior. This can be in the form of a small homebrew application, or by providing a save file and reproduction steps for a specific game.
+* A description of the _expected behavior_, contrasted with the _actual behavior_ observed.
+* Information on the environment: OS/distro, CPU, GPU (including driver), RAM etc.
+* A Ryujinx log file of the run instance where the issue occurred. Log files can be found in `[Executable Folder]/Logs` and are named chronologically.
+* Additional information, e.g. is it a regression from previous versions? Are there any known workarounds?
+
+When ready to submit a bug report, please use the [Bug Report issue template](https://github.com/Ryujinx/Ryujinx/issues/new?assignees=&labels=bug&projects=&template=bug_report.yml&title=%5BBug%5D).
+
+## Contributing Changes
+
+Project maintainers will merge changes that both improve the project and meet our standards for code quality.
+
+The [Pull Request Guide](docs/workflow/pr-guide.md) and [License](https://github.com/Ryujinx/Ryujinx/blob/master/LICENSE.txt) docs define additional guidance.
+
+### DOs and DON'Ts
+
+Please do:
+
+* **DO** follow our [coding style](docs/coding-guidelines/coding-style.md) (C# code-specific).
+* **DO** give priority to the current style of the project or file you're changing even if it diverges from the general guidelines.
+* **DO** keep the discussions focused. When a new or related topic comes up
+ it's often better to create new issue than to side track the discussion.
+* **DO** clearly state on an issue that you are going to take on implementing it.
+* **DO** blog and tweet (or whatever) about your contributions, frequently!
+
+Please do not:
+
+* **DON'T** make PRs for style changes.
+* **DON'T** surprise us with big pull requests. Instead, file an issue and talk with us on Discord to start
+ a discussion so we can agree on a direction before you invest a large amount
+ of time.
+* **DON'T** commit code that you didn't write. If you find code that you think is a good fit to add to Ryujinx, file an issue or talk to us on Discord to start a discussion before proceeding.
+* **DON'T** submit PRs that alter licensing related files or headers. If you believe there's a problem with them, file an issue and we'll be happy to discuss it.
+
+### Suggested Workflow
+
+We use and recommend the following workflow:
+
+1. Create or find an issue for your work.
+ - You can skip this step for trivial changes.
+ - Get agreement from the team and the community that your proposed change is a good one if it is of significant size or changes core functionality.
+ - Clearly state that you are going to take on implementing it, if that's the case. You can request that the issue be assigned to you. Note: The issue filer and the implementer don't have to be the same person.
+2. Create a personal fork of the repository on GitHub (if you don't already have one).
+3. In your fork, create a branch off of main (`git checkout -b mybranch`).
+ - Branches are useful since they isolate your changes from incoming changes from upstream. They also enable you to create multiple PRs from the same fork.
+4. Make and commit your changes to your branch.
+ - [Build Instructions](https://github.com/Ryujinx/Ryujinx#building) explains how to build and test.
+ - Commit messages should be clear statements of action and intent.
+6. Build the repository with your changes.
+ - Make sure that the builds are clean.
+ - Make sure that `dotnet format` has been run and any corrections tested and committed.
+7. Create a pull request (PR) against the Ryujinx/Ryujinx repository's **main** branch.
+ - State in the description what issue or improvement your change is addressing.
+ - Check if all the Continuous Integration checks are passing. Refer to [Actions](https://github.com/Ryujinx/Ryujinx/actions) to check for outstanding errors.
+8. Wait for feedback or approval of your changes from the [core development team](https://github.com/orgs/Ryujinx/teams/developers)
+ - Details about the pull request [review procedure](docs/workflow/ci/pr-guide.md).
+9. When the team members have signed off, and all checks are green, your PR will be merged.
+ - The next official build will automatically include your change.
+ - You can delete the branch you used for making the change.
+
+### Good First Issues
+
+The team marks the most straightforward issues as [good first issues](https://github.com/Ryujinx/Ryujinx/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22). This set of issues is the place to start if you are interested in contributing but new to the codebase.
+
+### Commit Messages
+
+Please format commit messages as follows (based on [A Note About Git Commit Messages](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)):
+
+```
+Summarize change in 50 characters or less
+
+Provide more detail after the first line. Leave one blank line below the
+summary and wrap all lines at 72 characters or less.
+
+If the change fixes an issue, leave another blank line after the final
+paragraph and indicate which issue is fixed in the specific format
+below.
+
+Fix #42
+```
+
+Also do your best to factor commits appropriately, not too large with unrelated things in the same commit, and not too small with the same small change applied N times in N different commits.
+
+### PR - CI Process
+
+The [Ryujinx continuous integration](https://github.com/Ryujinx/Ryujinx/actions) (CI) system will automatically perform the required builds and run tests (including the ones you are expected to run) for PRs. Builds and test runs must be clean or have bugs properly filed against flaky/unexpected failures that are unrelated to your change.
+
+If the CI build fails for any reason, the PR actions tab should be consulted for further information on the failure. There are a few usual suspects for such a failure:
+* `dotnet format` has not been run on the PR and has outstanding stylistic issues.
+* There is an error within the PR that fails a test or errors the compiler.
+* Random failure of the workflow can occasionally result in a CI failure. In this scenario a maintainer will manually restart the job.
+
+### PR Feedback
+
+Ryujinx team and community members will provide feedback on your change. Community feedback is highly valued. You may see the absence of team feedback if the community has already provided good review feedback.
+
+Two Ryujinx team members must review and approve every PR prior to merge. They will often reply with "LGTM, see nit". That means that the PR will be merged once the feedback is resolved. "LGTM" == "looks good to me".
+
+There are lots of thoughts and [approaches](https://github.com/antlr/antlr4-cpp/blob/master/CONTRIBUTING.md#emoji) for how to efficiently discuss changes. It is best to be clear and explicit with your feedback. Please be patient with people who might not understand the finer details about your approach to feedback.
+
+#### Copying Changes from Other Projects
+
+Ryujinx uses some implementations and frameworks from other projects. The following rules must be followed for PRs that include changes from another project:
+
+- The license of the file is [permissive](https://en.wikipedia.org/wiki/Permissive_free_software_licence).
+- The license of the file is left in-tact.
+- The contribution is correctly attributed in the [3rd party notices](https://github.com/Ryujinx/Ryujinx/blob/master/distribution/legal/THIRDPARTY.md) file in the repository, as needed.
+
diff --git a/Directory.Packages.props b/Directory.Packages.props
new file mode 100644
index 0000000..301024c
--- /dev/null
+++ b/Directory.Packages.props
@@ -0,0 +1,52 @@
+
+
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..818ddd7
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,9 @@
+MIT License
+
+Copyright (c) Ryujinx Team and Contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7f2294d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,170 @@
+
+
+
+
+ Ryujinx
+
+ (REE-YOU-JINX)
+
+
+
+
+ Ryujinx is an open-source Nintendo Switch emulator, created by gdkchan, written in C#.
+ This emulator aims at providing excellent accuracy and performance, a user-friendly interface and consistent builds.
+ It was written from scratch and development on the project began in September 2017.
+ Ryujinx is available on Github under the MIT license .
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+## Compatibility
+
+As of May 2024, Ryujinx has been tested on approximately 4,300 titles;
+over 4,100 boot past menus and into gameplay, with roughly 3,550 of those being considered playable.
+
+You can check out the compatibility list [here](https://github.com/Ryujinx/Ryujinx-Games-List/issues).
+
+Anyone is free to submit a new game test or update an existing game test entry;
+simply follow the new issue template and testing guidelines, or post as a reply to the applicable game issue.
+Use the search function to see if a game has been tested already!
+
+## Usage
+
+To run this emulator, your PC must be equipped with at least 8GiB of RAM;
+failing to meet this requirement may result in a poor gameplay experience or unexpected crashes.
+
+See our [Setup & Configuration Guide](https://github.com/Ryujinx/Ryujinx/wiki/Ryujinx-Setup-&-Configuration-Guide) on how to set up the emulator.
+
+For our Local Wireless (LDN) builds, see our [Multiplayer: Local Play/Local Wireless Guide
+](https://github.com/Ryujinx/Ryujinx/wiki/Multiplayer-(LDN-Local-Wireless)-Guide).
+
+Avalonia UI comes with translations for various languages. See [Crowdin](https://crwd.in/ryujinx) for more information.
+
+## Latest build
+
+These builds are compiled automatically for each commit on the master branch.
+While we strive to ensure optimal stability and performance prior to pushing an update, our automated builds **may be unstable or completely broken**.
+
+If you want to see details on updates to the emulator, you can visit our [Changelog](https://github.com/Ryujinx/Ryujinx/wiki/Changelog).
+
+The latest automatic build for Windows, macOS, and Linux can be found on the [Official Website](https://ryujinx.org/download).
+
+## Documentation
+
+If you are planning to contribute or just want to learn more about this project please read through our [documentation](docs/README.md).
+
+## Building
+
+If you wish to build the emulator yourself, follow these steps:
+
+### Step 1
+
+Install the [.NET 8.0 (or higher) SDK](https://dotnet.microsoft.com/download/dotnet/8.0).
+Make sure your SDK version is higher or equal to the required version specified in [global.json](global.json).
+
+### Step 2
+
+Either use `git clone https://github.com/Ryujinx/Ryujinx` on the command line to clone the repository or use Code --> Download zip button to get the files.
+
+### Step 3
+
+To build Ryujinx, open a command prompt inside the project directory.
+You can quickly access it on Windows by holding shift in File Explorer, then right clicking and selecting `Open command window here`.
+Then type the following command: `dotnet build -c Release -o build`
+the built files will be found in the newly created build directory.
+
+Ryujinx system files are stored in the `Ryujinx` folder.
+This folder is located in the user folder, which can be accessed by clicking `Open Ryujinx Folder` under the File menu in the GUI.
+
+## Features
+
+- **Audio**
+
+ Audio output is entirely supported, audio input (microphone) isn't supported.
+ We use C# wrappers for [OpenAL](https://openal-soft.org/), and [SDL2](https://www.libsdl.org/) & [libsoundio](http://libsound.io/) as fallbacks.
+
+- **CPU**
+
+ The CPU emulator, ARMeilleure, emulates an ARMv8 CPU and currently has support for most 64-bit ARMv8 and some of the ARMv7 (and older) instructions, including partial 32-bit support.
+ It translates the ARM code to a custom IR, performs a few optimizations, and turns that into x86 code.
+ There are three memory manager options available depending on the user's preference, leveraging both software-based (slower) and host-mapped modes (much faster).
+ The fastest option (host, unchecked) is set by default.
+ Ryujinx also features an optional Profiled Persistent Translation Cache, which essentially caches translated functions so that they do not need to be translated every time the game loads.
+ The net result is a significant reduction in load times (the amount of time between launching a game and arriving at the title screen) for nearly every game.
+ NOTE: This feature is enabled by default in the Options menu > System tab.
+ You must launch the game at least twice to the title screen or beyond before performance improvements are unlocked on the third launch!
+ These improvements are permanent and do not require any extra launches going forward.
+
+- **GPU**
+
+ The GPU emulator emulates the Switch's Maxwell GPU using either the OpenGL (version 4.5 minimum), Vulkan, or Metal (via MoltenVK) APIs through a custom build of OpenTK or Silk.NET respectively.
+ There are currently six graphics enhancements available to the end user in Ryujinx: Disk Shader Caching, Resolution Scaling, Anti-Aliasing, Scaling Filters (including FSR), Anisotropic Filtering and Aspect Ratio Adjustment.
+ These enhancements can be adjusted or toggled as desired in the GUI.
+
+- **Input**
+
+ We currently have support for keyboard, mouse, touch input, JoyCon input support, and nearly all controllers.
+ Motion controls are natively supported in most cases; for dual-JoyCon motion support, DS4Windows or BetterJoy are currently required.
+ In all scenarios, you can set up everything inside the input configuration menu.
+
+- **DLC & Modifications**
+
+ Ryujinx is able to manage add-on content/downloadable content through the GUI.
+ Mods (romfs, exefs, and runtime mods such as cheats) are also supported;
+ the GUI contains a shortcut to open the respective mods folder for a particular game.
+
+- **Configuration**
+
+ The emulator has settings for enabling or disabling some logging, remapping controllers, and more.
+ You can configure all of them through the graphical interface or manually through the config file, `Config.json`, found in the user folder which can be accessed by clicking `Open Ryujinx Folder` under the File menu in the GUI.
+
+## Contact
+
+If you have contributions, suggestions, need emulator support or just want to get in touch with the team, join our [Discord server](https://discord.com/invite/Ryujinx).
+You may also review our [FAQ](https://github.com/Ryujinx/Ryujinx/wiki/Frequently-Asked-Questions).
+
+## Donations
+
+If you'd like to support the project financially, Ryujinx has an active Patreon campaign.
+
+
+
+
+
+All developers working on the project do so in their free time, but the project has several expenses:
+* Hackable Nintendo Switch consoles to reverse-engineer the hardware
+* Additional computer hardware for testing purposes (e.g. GPUs to diagnose graphical bugs, etc.)
+* Licenses for various software development tools (e.g. Jetbrains, IDA)
+* Web hosting and infrastructure maintenance (e.g. LDN servers)
+
+All funds received through Patreon are considered a donation to support the project. Patrons receive early access to progress reports and exclusive access to developer interviews.
+
+## License
+
+This software is licensed under the terms of the [MIT license](LICENSE.txt).
+This project makes use of code authored by the libvpx project, licensed under BSD and the ffmpeg project, licensed under LGPLv3.
+See [LICENSE.txt](LICENSE.txt) and [THIRDPARTY.md](distribution/legal/THIRDPARTY.md) for more details.
+
+## Credits
+
+- [LibHac](https://github.com/Thealexbarney/LibHac) is used for our file-system.
+- [AmiiboAPI](https://www.amiiboapi.com) is used in our Amiibo emulation.
+- [ldn_mitm](https://github.com/spacemeowx2/ldn_mitm) is used for one of our available multiplayer modes.
+- [ShellLink](https://github.com/securifybv/ShellLink) is used for Windows shortcut generation.
diff --git a/Ryujinx.sln b/Ryujinx.sln
new file mode 100644
index 0000000..76ebd57
--- /dev/null
+++ b/Ryujinx.sln
@@ -0,0 +1,265 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.1.32228.430
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Gtk3", "src\Ryujinx.Gtk3\Ryujinx.Gtk3.csproj", "{074045D4-3ED2-4711-9169-E385F2BFB5A0}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Tests", "src\Ryujinx.Tests\Ryujinx.Tests.csproj", "{EBB55AEA-C7D7-4DEB-BF96-FA1789E225E9}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Tests.Unicorn", "src\Ryujinx.Tests.Unicorn\Ryujinx.Tests.Unicorn.csproj", "{D8F72938-78EF-4E8C-BAFE-531C9C3C8F15}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.HLE", "src\Ryujinx.HLE\Ryujinx.HLE.csproj", "{CB92CFF9-1D62-4D4F-9E88-8130EF61E351}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.ShaderTools", "src\Ryujinx.ShaderTools\Ryujinx.ShaderTools.csproj", "{3AB294D0-2230-468F-9EB3-BDFCAEAE99A5}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Common", "src\Ryujinx.Common\Ryujinx.Common.csproj", "{5FD4E4F6-8928-4B3C-BE07-28A675C17226}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ARMeilleure", "src\ARMeilleure\ARMeilleure.csproj", "{ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Gpu", "src\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj", "{ADA7EA87-0D63-4D97-9433-922A2124401F}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.GAL", "src\Ryujinx.Graphics.GAL\Ryujinx.Graphics.GAL.csproj", "{A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.OpenGL", "src\Ryujinx.Graphics.OpenGL\Ryujinx.Graphics.OpenGL.csproj", "{9558FB96-075D-4219-8FFF-401979DC0B69}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Texture", "src\Ryujinx.Graphics.Texture\Ryujinx.Graphics.Texture.csproj", "{E1B1AD28-289D-47B7-A106-326972240207}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Shader", "src\Ryujinx.Graphics.Shader\Ryujinx.Graphics.Shader.csproj", "{03B955CD-AD84-4B93-AAA7-BF17923BBAA5}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Nvdec", "src\Ryujinx.Graphics.Nvdec\Ryujinx.Graphics.Nvdec.csproj", "{85A0FA56-DC01-4A42-8808-70DAC76BD66D}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Audio", "src\Ryujinx.Audio\Ryujinx.Audio.csproj", "{806ACF6D-90B0-45D0-A1AC-5F220F3B3985}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{36F870C1-3E5F-485F-B426-F0645AF78751}"
+ ProjectSection(SolutionItems) = preProject
+ .editorconfig = .editorconfig
+ Directory.Packages.props = Directory.Packages.props
+ EndProjectSection
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Memory", "src\Ryujinx.Memory\Ryujinx.Memory.csproj", "{A5E6C691-9E22-4263-8F40-42F002CE66BE}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Tests.Memory", "src\Ryujinx.Tests.Memory\Ryujinx.Tests.Memory.csproj", "{D1CC5322-7325-4F6B-9625-194B30BE1296}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Cpu", "src\Ryujinx.Cpu\Ryujinx.Cpu.csproj", "{3DF35E3D-D844-4399-A9A1-A9E923264C17}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Device", "src\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj", "{C3002C3C-7B09-4FE7-894A-372EDA22FC6E}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Host1x", "src\Ryujinx.Graphics.Host1x\Ryujinx.Graphics.Host1x.csproj", "{C35F1536-7DE5-4F9D-9604-B5B4E1561947}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Nvdec.Vp9", "src\Ryujinx.Graphics.Nvdec.Vp9\Ryujinx.Graphics.Nvdec.Vp9.csproj", "{B9AECA11-E248-4886-A10B-81B631CAAF29}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Vic", "src\Ryujinx.Graphics.Vic\Ryujinx.Graphics.Vic.csproj", "{81BB2C11-9408-4EA3-822E-42987AF54429}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Video", "src\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj", "{FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Audio.Backends.OpenAL", "src\Ryujinx.Audio.Backends.OpenAL\Ryujinx.Audio.Backends.OpenAL.csproj", "{0BE11899-DF2D-4BDE-B9EE-2489E8D35E7D}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Audio.Backends.SoundIo", "src\Ryujinx.Audio.Backends.SoundIo\Ryujinx.Audio.Backends.SoundIo.csproj", "{716364DE-B988-41A6-BAB4-327964266ECC}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Input", "src\Ryujinx.Input\Ryujinx.Input.csproj", "{C16F112F-38C3-40BC-9F5F-4791112063D6}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Input.SDL2", "src\Ryujinx.Input.SDL2\Ryujinx.Input.SDL2.csproj", "{DFAB6F2D-B9BF-4AFF-B22B-7684A328EBA3}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.SDL2.Common", "src\Ryujinx.SDL2.Common\Ryujinx.SDL2.Common.csproj", "{2D5D3A1D-5730-4648-B0AB-06C53CB910C0}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Audio.Backends.SDL2", "src\Ryujinx.Audio.Backends.SDL2\Ryujinx.Audio.Backends.SDL2.csproj", "{D99A395A-8569-4DB0-B336-900647890052}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Headless.SDL2", "src\Ryujinx.Headless.SDL2\Ryujinx.Headless.SDL2.csproj", "{390DC343-5CB4-4C79-A5DD-E3ED235E4C49}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Nvdec.FFmpeg", "src\Ryujinx.Graphics.Nvdec.FFmpeg\Ryujinx.Graphics.Nvdec.FFmpeg.csproj", "{BEE1C184-C9A4-410B-8DFC-FB74D5C93AEB}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx", "src\Ryujinx\Ryujinx.csproj", "{7C1B2721-13DA-4B62-B046-C626605ECCE6}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.UI.Common", "src\Ryujinx.UI.Common\Ryujinx.UI.Common.csproj", "{BA161CA0-CD65-4E6E-B644-51C8D1E542DC}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Horizon.Generators", "src\Ryujinx.Horizon.Generators\Ryujinx.Horizon.Generators.csproj", "{6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Vulkan", "src\Ryujinx.Graphics.Vulkan\Ryujinx.Graphics.Vulkan.csproj", "{D4D09B08-D580-4D69-B886-C35D2853F6C8}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Spv.Generator", "src\Spv.Generator\Spv.Generator.csproj", "{2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.UI.LocaleGenerator", "src\Ryujinx.UI.LocaleGenerator\Ryujinx.UI.LocaleGenerator.csproj", "{77D01AD9-2C98-478E-AE1D-8F7100738FB4}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Horizon.Common", "src\Ryujinx.Horizon.Common\Ryujinx.Horizon.Common.csproj", "{77F96ECE-4952-42DB-A528-DED25572A573}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Horizon", "src\Ryujinx.Horizon\Ryujinx.Horizon.csproj", "{AF34127A-3A92-43E5-8496-14960A50B1F1}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Horizon.Kernel.Generators", "src\Ryujinx.Horizon.Kernel.Generators\Ryujinx.Horizon.Kernel.Generators.csproj", "{7F55A45D-4E1D-4A36-ADD3-87F29A285AA2}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.HLE.Generators", "src\Ryujinx.HLE.Generators\Ryujinx.HLE.Generators.csproj", "{B575BCDE-2FD8-4A5D-8756-31CDD7FE81F0}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {074045D4-3ED2-4711-9169-E385F2BFB5A0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {074045D4-3ED2-4711-9169-E385F2BFB5A0}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {074045D4-3ED2-4711-9169-E385F2BFB5A0}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {074045D4-3ED2-4711-9169-E385F2BFB5A0}.Release|Any CPU.Build.0 = Release|Any CPU
+ {EBB55AEA-C7D7-4DEB-BF96-FA1789E225E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {EBB55AEA-C7D7-4DEB-BF96-FA1789E225E9}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {EBB55AEA-C7D7-4DEB-BF96-FA1789E225E9}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {EBB55AEA-C7D7-4DEB-BF96-FA1789E225E9}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D8F72938-78EF-4E8C-BAFE-531C9C3C8F15}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D8F72938-78EF-4E8C-BAFE-531C9C3C8F15}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D8F72938-78EF-4E8C-BAFE-531C9C3C8F15}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D8F72938-78EF-4E8C-BAFE-531C9C3C8F15}.Release|Any CPU.Build.0 = Release|Any CPU
+ {CB92CFF9-1D62-4D4F-9E88-8130EF61E351}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {CB92CFF9-1D62-4D4F-9E88-8130EF61E351}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {CB92CFF9-1D62-4D4F-9E88-8130EF61E351}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {CB92CFF9-1D62-4D4F-9E88-8130EF61E351}.Release|Any CPU.Build.0 = Release|Any CPU
+ {3AB294D0-2230-468F-9EB3-BDFCAEAE99A5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {3AB294D0-2230-468F-9EB3-BDFCAEAE99A5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3AB294D0-2230-468F-9EB3-BDFCAEAE99A5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {3AB294D0-2230-468F-9EB3-BDFCAEAE99A5}.Release|Any CPU.Build.0 = Release|Any CPU
+ {5FD4E4F6-8928-4B3C-BE07-28A675C17226}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {5FD4E4F6-8928-4B3C-BE07-28A675C17226}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {5FD4E4F6-8928-4B3C-BE07-28A675C17226}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {5FD4E4F6-8928-4B3C-BE07-28A675C17226}.Release|Any CPU.Build.0 = Release|Any CPU
+ {ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}.Release|Any CPU.Build.0 = Release|Any CPU
+ {ADA7EA87-0D63-4D97-9433-922A2124401F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {ADA7EA87-0D63-4D97-9433-922A2124401F}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {ADA7EA87-0D63-4D97-9433-922A2124401F}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {ADA7EA87-0D63-4D97-9433-922A2124401F}.Release|Any CPU.Build.0 = Release|Any CPU
+ {A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {9558FB96-075D-4219-8FFF-401979DC0B69}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {9558FB96-075D-4219-8FFF-401979DC0B69}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {9558FB96-075D-4219-8FFF-401979DC0B69}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {9558FB96-075D-4219-8FFF-401979DC0B69}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E1B1AD28-289D-47B7-A106-326972240207}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {E1B1AD28-289D-47B7-A106-326972240207}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E1B1AD28-289D-47B7-A106-326972240207}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {E1B1AD28-289D-47B7-A106-326972240207}.Release|Any CPU.Build.0 = Release|Any CPU
+ {03B955CD-AD84-4B93-AAA7-BF17923BBAA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {03B955CD-AD84-4B93-AAA7-BF17923BBAA5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {03B955CD-AD84-4B93-AAA7-BF17923BBAA5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {03B955CD-AD84-4B93-AAA7-BF17923BBAA5}.Release|Any CPU.Build.0 = Release|Any CPU
+ {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Release|Any CPU.Build.0 = Release|Any CPU
+ {806ACF6D-90B0-45D0-A1AC-5F220F3B3985}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {806ACF6D-90B0-45D0-A1AC-5F220F3B3985}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {806ACF6D-90B0-45D0-A1AC-5F220F3B3985}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {806ACF6D-90B0-45D0-A1AC-5F220F3B3985}.Release|Any CPU.Build.0 = Release|Any CPU
+ {A5E6C691-9E22-4263-8F40-42F002CE66BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {A5E6C691-9E22-4263-8F40-42F002CE66BE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {A5E6C691-9E22-4263-8F40-42F002CE66BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {A5E6C691-9E22-4263-8F40-42F002CE66BE}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D1CC5322-7325-4F6B-9625-194B30BE1296}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D1CC5322-7325-4F6B-9625-194B30BE1296}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D1CC5322-7325-4F6B-9625-194B30BE1296}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D1CC5322-7325-4F6B-9625-194B30BE1296}.Release|Any CPU.Build.0 = Release|Any CPU
+ {3DF35E3D-D844-4399-A9A1-A9E923264C17}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {3DF35E3D-D844-4399-A9A1-A9E923264C17}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3DF35E3D-D844-4399-A9A1-A9E923264C17}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {3DF35E3D-D844-4399-A9A1-A9E923264C17}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Release|Any CPU.Build.0 = Release|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {B9AECA11-E248-4886-A10B-81B631CAAF29}.Release|Any CPU.Build.0 = Release|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {81BB2C11-9408-4EA3-822E-42987AF54429}.Release|Any CPU.Build.0 = Release|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Release|Any CPU.Build.0 = Release|Any CPU
+ {0BE11899-DF2D-4BDE-B9EE-2489E8D35E7D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {0BE11899-DF2D-4BDE-B9EE-2489E8D35E7D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {0BE11899-DF2D-4BDE-B9EE-2489E8D35E7D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {0BE11899-DF2D-4BDE-B9EE-2489E8D35E7D}.Release|Any CPU.Build.0 = Release|Any CPU
+ {716364DE-B988-41A6-BAB4-327964266ECC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {716364DE-B988-41A6-BAB4-327964266ECC}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {716364DE-B988-41A6-BAB4-327964266ECC}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {716364DE-B988-41A6-BAB4-327964266ECC}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C16F112F-38C3-40BC-9F5F-4791112063D6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C16F112F-38C3-40BC-9F5F-4791112063D6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C16F112F-38C3-40BC-9F5F-4791112063D6}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C16F112F-38C3-40BC-9F5F-4791112063D6}.Release|Any CPU.Build.0 = Release|Any CPU
+ {DFAB6F2D-B9BF-4AFF-B22B-7684A328EBA3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {DFAB6F2D-B9BF-4AFF-B22B-7684A328EBA3}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {DFAB6F2D-B9BF-4AFF-B22B-7684A328EBA3}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {DFAB6F2D-B9BF-4AFF-B22B-7684A328EBA3}.Release|Any CPU.Build.0 = Release|Any CPU
+ {2D5D3A1D-5730-4648-B0AB-06C53CB910C0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {2D5D3A1D-5730-4648-B0AB-06C53CB910C0}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {2D5D3A1D-5730-4648-B0AB-06C53CB910C0}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {2D5D3A1D-5730-4648-B0AB-06C53CB910C0}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D99A395A-8569-4DB0-B336-900647890052}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D99A395A-8569-4DB0-B336-900647890052}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D99A395A-8569-4DB0-B336-900647890052}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D99A395A-8569-4DB0-B336-900647890052}.Release|Any CPU.Build.0 = Release|Any CPU
+ {390DC343-5CB4-4C79-A5DD-E3ED235E4C49}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {390DC343-5CB4-4C79-A5DD-E3ED235E4C49}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {390DC343-5CB4-4C79-A5DD-E3ED235E4C49}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {390DC343-5CB4-4C79-A5DD-E3ED235E4C49}.Release|Any CPU.Build.0 = Release|Any CPU
+ {BEE1C184-C9A4-410B-8DFC-FB74D5C93AEB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {BEE1C184-C9A4-410B-8DFC-FB74D5C93AEB}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {BEE1C184-C9A4-410B-8DFC-FB74D5C93AEB}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {BEE1C184-C9A4-410B-8DFC-FB74D5C93AEB}.Release|Any CPU.Build.0 = Release|Any CPU
+ {7C1B2721-13DA-4B62-B046-C626605ECCE6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {7C1B2721-13DA-4B62-B046-C626605ECCE6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {7C1B2721-13DA-4B62-B046-C626605ECCE6}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {7C1B2721-13DA-4B62-B046-C626605ECCE6}.Release|Any CPU.Build.0 = Release|Any CPU
+ {BA161CA0-CD65-4E6E-B644-51C8D1E542DC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {BA161CA0-CD65-4E6E-B644-51C8D1E542DC}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {BA161CA0-CD65-4E6E-B644-51C8D1E542DC}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {BA161CA0-CD65-4E6E-B644-51C8D1E542DC}.Release|Any CPU.Build.0 = Release|Any CPU
+ {6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Release|Any CPU.Build.0 = Release|Any CPU
+ {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {77D01AD9-2C98-478E-AE1D-8F7100738FB4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {77D01AD9-2C98-478E-AE1D-8F7100738FB4}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {77D01AD9-2C98-478E-AE1D-8F7100738FB4}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {77D01AD9-2C98-478E-AE1D-8F7100738FB4}.Release|Any CPU.Build.0 = Release|Any CPU
+ {77F96ECE-4952-42DB-A528-DED25572A573}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {77F96ECE-4952-42DB-A528-DED25572A573}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {77F96ECE-4952-42DB-A528-DED25572A573}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {77F96ECE-4952-42DB-A528-DED25572A573}.Release|Any CPU.Build.0 = Release|Any CPU
+ {AF34127A-3A92-43E5-8496-14960A50B1F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {AF34127A-3A92-43E5-8496-14960A50B1F1}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {AF34127A-3A92-43E5-8496-14960A50B1F1}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {AF34127A-3A92-43E5-8496-14960A50B1F1}.Release|Any CPU.Build.0 = Release|Any CPU
+ {7F55A45D-4E1D-4A36-ADD3-87F29A285AA2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {7F55A45D-4E1D-4A36-ADD3-87F29A285AA2}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {7F55A45D-4E1D-4A36-ADD3-87F29A285AA2}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {7F55A45D-4E1D-4A36-ADD3-87F29A285AA2}.Release|Any CPU.Build.0 = Release|Any CPU
+ {B575BCDE-2FD8-4A5D-8756-31CDD7FE81F0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {B575BCDE-2FD8-4A5D-8756-31CDD7FE81F0}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {B575BCDE-2FD8-4A5D-8756-31CDD7FE81F0}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {B575BCDE-2FD8-4A5D-8756-31CDD7FE81F0}.Release|Any CPU.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {110169B3-3328-4730-8AB0-BA05BEF75C1A}
+ EndGlobalSection
+EndGlobal
diff --git a/Ryujinx.sln.DotSettings b/Ryujinx.sln.DotSettings
new file mode 100644
index 0000000..ed7f3e9
--- /dev/null
+++ b/Ryujinx.sln.DotSettings
@@ -0,0 +1,23 @@
+
+ WARNING
+ WARNING
+ UseExplicitType
+ UseExplicitType
+ <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb"><ExtraRule Prefix="I" Suffix="" Style="AaBb" /></Policy>
+ <Policy><Descriptor Staticness="Any" AccessRightKinds="Any" Description="Types and namespaces"><ElementKinds><Kind Name="NAMESPACE" /><Kind Name="CLASS" /><Kind Name="STRUCT" /><Kind Name="ENUM" /><Kind Name="DELEGATE" /></ElementKinds></Descriptor><Policy Inspect="True" Prefix="" Suffix="" Style="AaBb"><ExtraRule Prefix="I" Suffix="" Style="AaBb" /></Policy></Policy>
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+ True
+
\ No newline at end of file
diff --git a/Windows Builder.cmd b/Windows Builder.cmd
new file mode 100644
index 0000000..7323984
--- /dev/null
+++ b/Windows Builder.cmd
@@ -0,0 +1,7 @@
+@echo off
+color a
+echo [BUILDER] Building app for windows...
+dotnet publish -c Release
+echo [BUILDER] Builded sucefully.
+start .\src\Ryujinx\bin\Release\net8.0
+pause
\ No newline at end of file
diff --git a/crowdin.yml b/crowdin.yml
new file mode 100644
index 0000000..279cd21
--- /dev/null
+++ b/crowdin.yml
@@ -0,0 +1,3 @@
+files:
+ - source: /**/Assets/Locales/en_US.json
+ translation: /**/Assets/Locales/%locale_with_underscore%.json
diff --git a/distribution/legal/THIRDPARTY.md b/distribution/legal/THIRDPARTY.md
new file mode 100644
index 0000000..5caa037
--- /dev/null
+++ b/distribution/legal/THIRDPARTY.md
@@ -0,0 +1,713 @@
+# ffmpeg (LGPLv3)
+
+ See License
+
+ ```
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+ the terms and conditions of version 3 of the GNU General Public
+ License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+ General Public License, and the "GNU GPL" refers to version 3 of the GNU
+ General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+ other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+ by the Library, but which is not otherwise based on the Library.
+ Defining a subclass of a class defined by the Library is deemed a mode
+ of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+ Application with the Library. The particular version of the Library
+ with which the Combined Work was made is also called the "Linked
+ Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+ Corresponding Source for the Combined Work, excluding any source code
+ for portions of the Combined Work that, considered in isolation, are
+ based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+ object code and/or source code for the Application, including any data
+ and utility programs needed for reproducing the Combined Work from the
+ Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+ without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+ facility refers to a function or data to be supplied by an Application
+ that uses the facility (other than as an argument passed when the
+ facility is invoked), then you may convey a copy of the modified
+ version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+ a header file that is part of the Library. You may convey such object
+ code under terms of your choice, provided that, if the incorporated
+ material is not limited to numerical parameters, data structure
+ layouts and accessors, or small macros, inline functions and templates
+ (ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+ taken together, effectively do not restrict modification of the
+ portions of the Library contained in the Combined Work and reverse
+ engineering for debugging such modifications, if you also do each of
+ the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+ Library side by side in a single library together with other library
+ facilities that are not Applications and are not covered by this
+ License, and convey such a combined library under terms of your
+ choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+ of the GNU Lesser General Public License from time to time. Such new
+ versions will be similar in spirit to the present version, but may
+ differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+ Library as you received it specifies that a certain numbered version
+ of the GNU Lesser General Public License "or any later version"
+ applies to it, you have the option of following the terms and
+ conditions either of that published version or of any later version
+ published by the Free Software Foundation. If the Library as you
+ received it does not specify a version number of the GNU Lesser
+ General Public License, you may choose any version of the GNU Lesser
+ General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+ whether future versions of the GNU Lesser General Public License shall
+ apply, that proxy's public statement of acceptance of any version is
+ permanent authorization for you to choose that version for the
+ Library.
+ ```
+
+
+# libvpx (BSD)
+
+ See License
+
+ ```
+ Copyright (c) 2010, The WebM Project authors. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+ * Neither the name of Google, nor the WebM Project, nor the names
+ of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ```
+
+
+# Atmosphère (MIT)
+
+ See License
+
+ ```
+ MIT License
+
+ Copyright (c) 2018-2020 Atmosphère-NX
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+ ```
+
+
+# OpenAL Soft (LGPLv2)
+
+ See License
+
+ ```
+ GNU LIBRARY GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ [This is the first released version of the library GPL. It is
+ numbered 2 because it goes with version 2 of the ordinary GPL.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+ freedom to share and change it. By contrast, the GNU General Public
+ Licenses are intended to guarantee your freedom to share and change
+ free software--to make sure the software is free for all its users.
+
+ This license, the Library General Public License, applies to some
+ specially designated Free Software Foundation software, and to any
+ other libraries whose authors decide to use it. You can use it for
+ your libraries, too.
+
+ When we speak of free software, we are referring to freedom, not
+ price. Our General Public Licenses are designed to make sure that you
+ have the freedom to distribute copies of free software (and charge for
+ this service if you wish), that you receive source code or can get it
+ if you want it, that you can change the software or use pieces of it
+ in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+ anyone to deny you these rights or to ask you to surrender the rights.
+ These restrictions translate to certain responsibilities for you if
+ you distribute copies of the library, or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+ or for a fee, you must give the recipients all the rights that we gave
+ you. You must make sure that they, too, receive or can get the source
+ code. If you link a program with the library, you must provide
+ complete object files to the recipients so that they can relink them
+ with the library, after making changes to the library and recompiling
+ it. And you must show them these terms so they know their rights.
+
+ Our method of protecting your rights has two steps: (1) copyright
+ the library, and (2) offer you this license which gives you legal
+ permission to copy, distribute and/or modify the library.
+
+ Also, for each distributor's protection, we want to make certain
+ that everyone understands that there is no warranty for this free
+ library. If the library is modified by someone else and passed on, we
+ want its recipients to know that what they have is not the original
+ version, so that any problems introduced by others will not reflect on
+ the original authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+ patents. We wish to avoid the danger that companies distributing free
+ software will individually obtain patent licenses, thus in effect
+ transforming the program into proprietary software. To prevent this,
+ we have made it clear that any patent must be licensed for everyone's
+ free use or not licensed at all.
+
+ Most GNU software, including some libraries, is covered by the ordinary
+ GNU General Public License, which was designed for utility programs. This
+ license, the GNU Library General Public License, applies to certain
+ designated libraries. This license is quite different from the ordinary
+ one; be sure to read it in full, and don't assume that anything in it is
+ the same as in the ordinary license.
+
+ The reason we have a separate public license for some libraries is that
+ they blur the distinction we usually make between modifying or adding to a
+ program and simply using it. Linking a program with a library, without
+ changing the library, is in some sense simply using the library, and is
+ analogous to running a utility program or application program. However, in
+ a textual and legal sense, the linked executable is a combined work, a
+ derivative of the original library, and the ordinary General Public License
+ treats it as such.
+
+ Because of this blurred distinction, using the ordinary General
+ Public License for libraries did not effectively promote software
+ sharing, because most developers did not use the libraries. We
+ concluded that weaker conditions might promote sharing better.
+
+ However, unrestricted linking of non-free programs would deprive the
+ users of those programs of all benefit from the free status of the
+ libraries themselves. This Library General Public License is intended to
+ permit developers of non-free programs to use free libraries, while
+ preserving your freedom as a user of such programs to change the free
+ libraries that are incorporated in them. (We have not seen how to achieve
+ this as regards changes in header files, but we have achieved it as regards
+ changes in the actual functions of the Library.) The hope is that this
+ will lead to faster development of free libraries.
+
+ The precise terms and conditions for copying, distribution and
+ modification follow. Pay close attention to the difference between a
+ "work based on the library" and a "work that uses the library". The
+ former contains code derived from the library, while the latter only
+ works together with the library.
+
+ Note that it is possible for a library to be covered by the ordinary
+ General Public License rather than by this special one.
+
+ GNU LIBRARY GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library which
+ contains a notice placed by the copyright holder or other authorized
+ party saying it may be distributed under the terms of this Library
+ General Public License (also called "this License"). Each licensee is
+ addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+ prepared so as to be conveniently linked with application programs
+ (which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+ which has been distributed under these terms. A "work based on the
+ Library" means either the Library or any derivative work under
+ copyright law: that is to say, a work containing the Library or a
+ portion of it, either verbatim or with modifications and/or translated
+ straightforwardly into another language. (Hereinafter, translation is
+ included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+ making modifications to it. For a library, complete source code means
+ all the source code for all modules it contains, plus any associated
+ interface definition files, plus the scripts used to control compilation
+ and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+ covered by this License; they are outside its scope. The act of
+ running a program using the Library is not restricted, and output from
+ such a program is covered only if its contents constitute a work based
+ on the Library (independent of the use of the Library in a tool for
+ writing it). Whether that is true depends on what the Library does
+ and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+ complete source code as you receive it, in any medium, provided that
+ you conspicuously and appropriately publish on each copy an
+ appropriate copyright notice and disclaimer of warranty; keep intact
+ all the notices that refer to this License and to the absence of any
+ warranty; and distribute a copy of this License along with the
+ Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+ and you may at your option offer warranty protection in exchange for a
+ fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+ of it, thus forming a work based on the Library, and copy and
+ distribute such modifications or work under the terms of Section 1
+ above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+ These requirements apply to the modified work as a whole. If
+ identifiable sections of that work are not derived from the Library,
+ and can be reasonably considered independent and separate works in
+ themselves, then this License, and its terms, do not apply to those
+ sections when you distribute them as separate works. But when you
+ distribute the same sections as part of a whole which is a work based
+ on the Library, the distribution of the whole must be on the terms of
+ this License, whose permissions for other licensees extend to the
+ entire whole, and thus to each and every part regardless of who wrote
+ it.
+
+ Thus, it is not the intent of this section to claim rights or contest
+ your rights to work written entirely by you; rather, the intent is to
+ exercise the right to control the distribution of derivative or
+ collective works based on the Library.
+
+ In addition, mere aggregation of another work not based on the Library
+ with the Library (or with a work based on the Library) on a volume of
+ a storage or distribution medium does not bring the other work under
+ the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+ License instead of this License to a given copy of the Library. To do
+ this, you must alter all the notices that refer to this License, so
+ that they refer to the ordinary GNU General Public License, version 2,
+ instead of to this License. (If a newer version than version 2 of the
+ ordinary GNU General Public License has appeared, then you can specify
+ that version instead if you wish.) Do not make any other change in
+ these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+ that copy, so the ordinary GNU General Public License applies to all
+ subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+ the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+ derivative of it, under Section 2) in object code or executable form
+ under the terms of Sections 1 and 2 above provided that you accompany
+ it with the complete corresponding machine-readable source code, which
+ must be distributed under the terms of Sections 1 and 2 above on a
+ medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+ from a designated place, then offering equivalent access to copy the
+ source code from the same place satisfies the requirement to
+ distribute the source code, even though third parties are not
+ compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+ Library, but is designed to work with the Library by being compiled or
+ linked with it, is called a "work that uses the Library". Such a
+ work, in isolation, is not a derivative work of the Library, and
+ therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+ creates an executable that is a derivative of the Library (because it
+ contains portions of the Library), rather than a "work that uses the
+ library". The executable is therefore covered by this License.
+ Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+ that is part of the Library, the object code for the work may be a
+ derivative work of the Library even though the source code is not.
+ Whether this is true is especially significant if the work can be
+ linked without the Library, or if the work is itself a library. The
+ threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+ structure layouts and accessors, and small macros and small inline
+ functions (ten lines or less in length), then the use of the object
+ file is unrestricted, regardless of whether it is legally a derivative
+ work. (Executables containing this object code plus portions of the
+ Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+ distribute the object code for the work under the terms of Section 6.
+ Any executables containing that work also fall under Section 6,
+ whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also compile or
+ link a "work that uses the Library" with the Library to produce a
+ work containing portions of the Library, and distribute that work
+ under terms of your choice, provided that the terms permit
+ modification of the work for the customer's own use and reverse
+ engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+ Library is used in it and that the Library and its use are covered by
+ this License. You must supply a copy of this License. If the work
+ during execution displays copyright notices, you must include the
+ copyright notice for the Library among them, as well as a reference
+ directing the user to the copy of this License. Also, you must do one
+ of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ c) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ d) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+ Library" must include any data and utility programs needed for
+ reproducing the executable from it. However, as a special exception,
+ the source code distributed need not include anything that is normally
+ distributed (in either source or binary form) with the major
+ components (compiler, kernel, and so on) of the operating system on
+ which the executable runs, unless that component itself accompanies
+ the executable.
+
+ It may happen that this requirement contradicts the license
+ restrictions of other proprietary libraries that do not normally
+ accompany the operating system. Such a contradiction means you cannot
+ use both them and the Library together in an executable that you
+ distribute.
+
+ 7. You may place library facilities that are a work based on the
+ Library side-by-side in a single library together with other library
+ facilities not covered by this License, and distribute such a combined
+ library, provided that the separate distribution of the work based on
+ the Library and of the other library facilities is otherwise
+ permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+ the Library except as expressly provided under this License. Any
+ attempt otherwise to copy, modify, sublicense, link with, or
+ distribute the Library is void, and will automatically terminate your
+ rights under this License. However, parties who have received copies,
+ or rights, from you under this License will not have their licenses
+ terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+ signed it. However, nothing else grants you permission to modify or
+ distribute the Library or its derivative works. These actions are
+ prohibited by law if you do not accept this License. Therefore, by
+ modifying or distributing the Library (or any work based on the
+ Library), you indicate your acceptance of this License to do so, and
+ all its terms and conditions for copying, distributing or modifying
+ the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+ Library), the recipient automatically receives a license from the
+ original licensor to copy, distribute, link with or modify the Library
+ subject to these terms and conditions. You may not impose any further
+ restrictions on the recipients' exercise of the rights granted herein.
+ You are not responsible for enforcing compliance by third parties to
+ this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent issues),
+ conditions are imposed on you (whether by court order, agreement or
+ otherwise) that contradict the conditions of this License, they do not
+ excuse you from the conditions of this License. If you cannot
+ distribute so as to satisfy simultaneously your obligations under this
+ License and any other pertinent obligations, then as a consequence you
+ may not distribute the Library at all. For example, if a patent
+ license would not permit royalty-free redistribution of the Library by
+ all those who receive copies directly or indirectly through you, then
+ the only way you could satisfy both it and this License would be to
+ refrain entirely from distribution of the Library.
+
+ If any portion of this section is held invalid or unenforceable under any
+ particular circumstance, the balance of the section is intended to apply,
+ and the section as a whole is intended to apply in other circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of any
+ such claims; this section has the sole purpose of protecting the
+ integrity of the free software distribution system which is
+ implemented by public license practices. Many people have made
+ generous contributions to the wide range of software distributed
+ through that system in reliance on consistent application of that
+ system; it is up to the author/donor to decide if he or she is willing
+ to distribute software through any other system and a licensee cannot
+ impose that choice.
+
+ This section is intended to make thoroughly clear what is believed to
+ be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+ certain countries either by patents or by copyrighted interfaces, the
+ original copyright holder who places the Library under this License may add
+ an explicit geographical distribution limitation excluding those countries,
+ so that distribution is permitted only in or among countries not thus
+ excluded. In such case, this License incorporates the limitation as if
+ written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+ versions of the Library General Public License from time to time.
+ Such new versions will be similar in spirit to the present version,
+ but may differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the Library
+ specifies a version number of this License which applies to it and
+ "any later version", you have the option of following the terms and
+ conditions either of that version or of any later version published by
+ the Free Software Foundation. If the Library does not specify a
+ license version number, you may choose any version ever published by
+ the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+ programs whose distribution conditions are incompatible with these,
+ write to the author to ask for permission. For software which is
+ copyrighted by the Free Software Foundation, write to the Free
+ Software Foundation; we sometimes make exceptions for this. Our
+ decision will be guided by the two goals of preserving the free status
+ of all derivatives of our free software and of promoting the sharing
+ and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+ WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+ EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+ OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+ KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+ LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+ THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+ WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+ AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+ FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+ CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+ LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+ RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+ FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+ SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+ ```
+
+
+# ShellLink (MIT)
+
+ See License
+
+ ```
+ MIT License
+
+ Copyright (c) 2017 Yorick Koster, Securify B.V.
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+ ```
+
diff --git a/distribution/linux/Ryujinx.desktop b/distribution/linux/Ryujinx.desktop
new file mode 100644
index 0000000..44f05bf
--- /dev/null
+++ b/distribution/linux/Ryujinx.desktop
@@ -0,0 +1,14 @@
+[Desktop Entry]
+Version=1.0
+Name=Ryujinx
+Type=Application
+Icon=Ryujinx
+Exec=Ryujinx.sh %f
+Comment=A Nintendo Switch Emulator
+GenericName=Nintendo Switch Emulator
+Terminal=false
+Categories=Game;Emulator;
+MimeType=application/x-nx-nca;application/x-nx-nro;application/x-nx-nso;application/x-nx-nsp;application/x-nx-xci;
+Keywords=Switch;Nintendo;Emulator;
+StartupWMClass=Ryujinx
+PrefersNonDefaultGPU=true
diff --git a/distribution/linux/Ryujinx.sh b/distribution/linux/Ryujinx.sh
new file mode 100644
index 0000000..30eb143
--- /dev/null
+++ b/distribution/linux/Ryujinx.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+SCRIPT_DIR=$(dirname "$(realpath "$0")")
+
+if [ -f "$SCRIPT_DIR/Ryujinx.Headless.SDL2" ]; then
+ RYUJINX_BIN="Ryujinx.Headless.SDL2"
+fi
+
+if [ -f "$SCRIPT_DIR/Ryujinx" ]; then
+ RYUJINX_BIN="Ryujinx"
+fi
+
+if [ -z "$RYUJINX_BIN" ]; then
+ exit 1
+fi
+
+COMMAND="env DOTNET_EnableAlternateStackCheck=1"
+
+if command -v gamemoderun > /dev/null 2>&1; then
+ COMMAND="$COMMAND gamemoderun"
+fi
+
+exec $COMMAND "$SCRIPT_DIR/$RYUJINX_BIN" "$@"
diff --git a/distribution/linux/mime/Ryujinx.xml b/distribution/linux/mime/Ryujinx.xml
new file mode 100644
index 0000000..bd9df0e
--- /dev/null
+++ b/distribution/linux/mime/Ryujinx.xml
@@ -0,0 +1,33 @@
+
+
+
+ Nintendo Content Archive
+ NCA
+
+
+
+
+ Nintendo Relocatable Object
+ NRO
+
+
+
+
+ Nintendo Shared Object
+ NSO
+
+
+
+
+ Nintendo Submission Package
+ NSP
+
+
+
+
+ Nintendo Switch Cartridge
+ XCI
+
+
+
+
diff --git a/distribution/linux/shortcut-template.desktop b/distribution/linux/shortcut-template.desktop
new file mode 100644
index 0000000..6bee0f8
--- /dev/null
+++ b/distribution/linux/shortcut-template.desktop
@@ -0,0 +1,13 @@
+[Desktop Entry]
+Version=1.0
+Name={0}
+Type=Application
+Icon={1}
+Exec={2} %f
+Comment=Nintendo Switch application
+GenericName=Nintendo Switch Emulator
+Terminal=false
+Categories=Game;Emulator;
+Keywords=Switch;Nintendo;Emulator;
+StartupWMClass=Ryujinx
+PrefersNonDefaultGPU=true
diff --git a/distribution/macos/Info.plist b/distribution/macos/Info.plist
new file mode 100644
index 0000000..53929f9
--- /dev/null
+++ b/distribution/macos/Info.plist
@@ -0,0 +1,169 @@
+
+
+
+
+ CFBundleDevelopmentRegion
+ English
+ CFBundleExecutable
+ Ryujinx
+ CFBundleGetInfoString
+ Ryujinx
+ CFBundleIconFile
+ Ryujinx.icns
+ CFBundleDocumentTypes
+
+
+ CFBundleTypeExtensions
+
+ nca
+ nro
+ nso
+ nsp
+ xci
+
+ CFBundleTypeName
+ Nintendo Switch File
+ CFBundleTypeRole
+ Viewer
+ LSHandlerRank
+ Default
+
+
+ CFBundleIdentifier
+ org.ryujinx.Ryujinx
+ CFBundleInfoDictionaryVersion
+ 6.0
+ CFBundleLongVersionString
+ %%RYUJINX_BUILD_VERSION%%-%%RYUJINX_BUILD_GIT_HASH%%"
+ CFBundleName
+ Ryujinx
+ CFBundlePackageType
+ APPL
+ CFBundleShortVersionString
+ 1.1
+ CFBundleSignature
+ ????
+ CFBundleVersion
+ 1.1.0
+ NSHighResolutionCapable
+
+ CSResourcesFileMapped
+
+ NSHumanReadableCopyright
+ Copyright © 2018 - 2023 Ryujinx Team and Contributors.
+ LSApplicationCategoryType
+ public.app-category.games
+ LSMinimumSystemVersion
+ 12.0
+ UTExportedTypeDeclarations
+
+
+ UTTypeDescription
+ Extensible Application Markup Language
+ UTTypeConformsTo
+
+ public.xml
+
+ UTTypeIdentifier
+ com.ryujinx.xaml
+ UTTypeTagSpecification
+
+ public.filename-extension
+
+ xaml
+
+
+
+
+ UTTypeDescription
+ Nintendo Submission Package
+ UTTypeConformsTo
+
+ public.data
+
+ UTTypeIdentifier
+ com.ryujinx.nsp
+ UTTypeTagSpecification
+
+ public.filename-extension
+
+ nsp
+
+
+
+
+ UTTypeDescription
+ Nintendo Switch Cartridge
+ UTTypeConformsTo
+
+ public.data
+
+ UTTypeIdentifier
+ com.ryujinx.xci
+ UTTypeTagSpecification
+
+ public.filename-extension
+
+ xci
+
+
+
+
+ UTTypeDescription
+ Nintendo Content Archive
+ UTTypeConformsTo
+
+ public.data
+
+ UTTypeIdentifier
+ com.ryujinx.nca
+ UTTypeTagSpecification
+
+ public.filename-extension
+
+ nca
+
+
+
+
+ UTTypeDescription
+ Nintendo Relocatable Object
+ UTTypeConformsTo
+
+ public.data
+
+ UTTypeIdentifier
+ com.ryujinx.nro
+ UTTypeTagSpecification
+
+ public.filename-extension
+
+ nro
+
+
+
+
+ UTTypeDescription
+ Nintendo Shared Object
+ UTTypeConformsTo
+
+ public.data
+
+ UTTypeIdentifier
+ com.ryujinx.nso
+ UTTypeTagSpecification
+
+ public.filename-extension
+
+ nso
+
+
+
+
+ LSEnvironment
+
+ DOTNET_DefaultStackSize
+ 200000
+
+
+
diff --git a/distribution/macos/Ryujinx.icns b/distribution/macos/Ryujinx.icns
new file mode 100644
index 0000000..f54a9ae
Binary files /dev/null and b/distribution/macos/Ryujinx.icns differ
diff --git a/distribution/macos/bundle_fix_up.py b/distribution/macos/bundle_fix_up.py
new file mode 100644
index 0000000..a8e3ac7
--- /dev/null
+++ b/distribution/macos/bundle_fix_up.py
@@ -0,0 +1,609 @@
+import argparse
+import hashlib
+import os
+from pathlib import Path
+import platform
+import shutil
+import struct
+import subprocess
+from typing import List, Optional, Tuple
+
+parser = argparse.ArgumentParser(description="Fixup for MacOS application bundle")
+parser.add_argument("input_directory", help="Input directory (Application path)")
+parser.add_argument("executable_sub_path", help="Main executable sub path")
+
+# Use Apple LLVM on Darwin, otherwise standard LLVM.
+if platform.system() == "Darwin":
+ OTOOL = "otool"
+ INSTALL_NAME_TOOL = "install_name_tool"
+else:
+ OTOOL = shutil.which("llvm-otool")
+ if OTOOL is None:
+ for llvm_ver in [15, 14, 13]:
+ otool_path = shutil.which(f"llvm-otool-{llvm_ver}")
+ if otool_path is not None:
+ OTOOL = otool_path
+ INSTALL_NAME_TOOL = shutil.which(f"llvm-install-name-tool-{llvm_ver}")
+ break
+ else:
+ INSTALL_NAME_TOOL = shutil.which("llvm-install-name-tool")
+
+
+args = parser.parse_args()
+
+
+def get_dylib_id(dylib_path: Path) -> str:
+ res = subprocess.check_output([OTOOL, "-D", str(dylib_path.absolute())]).decode(
+ "utf-8"
+ )
+
+ return res.split("\n")[1]
+
+
+def get_dylib_dependencies(dylib_path: Path) -> List[str]:
+ output = (
+ subprocess.check_output([OTOOL, "-L", str(dylib_path.absolute())])
+ .decode("utf-8")
+ .split("\n")[1:]
+ )
+
+ res = []
+
+ for line in output:
+ line = line.strip()
+ index = line.find(" (compatibility version ")
+ if index == -1:
+ continue
+
+ line = line[:index]
+
+ res.append(line)
+
+ return res
+
+
+def replace_dylib_id(dylib_path: Path, new_id: str):
+ subprocess.check_call(
+ [INSTALL_NAME_TOOL, "-id", new_id, str(dylib_path.absolute())]
+ )
+
+
+def change_dylib_link(dylib_path: Path, old: str, new: str):
+ subprocess.check_call(
+ [INSTALL_NAME_TOOL, "-change", old, new, str(dylib_path.absolute())]
+ )
+
+
+def add_dylib_rpath(dylib_path: Path, rpath: str):
+ subprocess.check_call(
+ [INSTALL_NAME_TOOL, "-add_rpath", rpath, str(dylib_path.absolute())]
+ )
+
+
+def fixup_dylib(
+ dylib_path: Path,
+ replacement_path: str,
+ search_path: List[str],
+ content_directory: Path,
+):
+ dylib_id = get_dylib_id(dylib_path)
+ new_dylib_id = replacement_path + "/" + os.path.basename(dylib_id)
+ replace_dylib_id(dylib_path, new_dylib_id)
+
+ dylib_dependencies = get_dylib_dependencies(dylib_path)
+ dylib_new_mapping = {}
+
+ for dylib_dependency in dylib_dependencies:
+ if (
+ not dylib_dependency.startswith("@executable_path")
+ and not dylib_dependency.startswith("/usr/lib")
+ and not dylib_dependency.startswith("/System/Library")
+ ):
+ dylib_dependency_name = os.path.basename(dylib_dependency)
+ library_found = False
+ for library_base_path in search_path:
+ lib_path = Path(os.path.join(library_base_path, dylib_dependency_name))
+
+ if lib_path.exists():
+ target_replacement_path = get_path_related_to_target_exec(
+ content_directory, lib_path
+ )
+
+ dylib_new_mapping[dylib_dependency] = (
+ target_replacement_path
+ + "/"
+ + os.path.basename(dylib_dependency)
+ )
+ library_found = True
+
+ if not library_found:
+ raise Exception(
+ f"{dylib_id}: Cannot find dependency {dylib_dependency_name} for fixup"
+ )
+
+ for key in dylib_new_mapping:
+ change_dylib_link(dylib_path, key, dylib_new_mapping[key])
+
+
+FILE_TYPE_ASSEMBLY = 1
+
+ALIGN_REQUIREMENTS = 4096
+
+
+def parse_embedded_string(data: bytes) -> Tuple[bytes, str]:
+ first_byte = data[0]
+
+ if (first_byte & 0x80) == 0:
+ size = first_byte
+ data = data[1:]
+ else:
+ second_byte = data[1]
+
+ assert (second_byte & 0x80) == 0
+
+ size = (second_byte << 7) | (first_byte & 0x7F)
+
+ data = data[2:]
+
+ res = data[:size].decode("utf-8")
+ data = data[size:]
+
+ return (data, res)
+
+
+def write_embedded_string(file, string: str):
+ raw_str = string.encode("utf-8")
+ raw_str_len = len(raw_str)
+
+ assert raw_str_len < 0x7FFF
+
+ if raw_str_len > 0x7F:
+ file.write(struct.pack("b", raw_str_len & 0x7F | 0x80))
+ file.write(struct.pack("b", raw_str_len >> 7))
+ else:
+ file.write(struct.pack("b", raw_str_len))
+
+ file.write(raw_str)
+
+
+class BundleFileEntry(object):
+ offset: int
+ size: int
+ compressed_size: int
+ file_type: int
+ relative_path: str
+ data: bytes
+
+ def __init__(
+ self,
+ offset: int,
+ size: int,
+ compressed_size: int,
+ file_type: int,
+ relative_path: str,
+ data: bytes,
+ ) -> None:
+ self.offset = offset
+ self.size = size
+ self.compressed_size = compressed_size
+ self.file_type = file_type
+ self.relative_path = relative_path
+ self.data = data
+
+ def write(self, file):
+ self.offset = file.tell()
+
+ if (
+ self.file_type == FILE_TYPE_ASSEMBLY
+ and (self.offset % ALIGN_REQUIREMENTS) != 0
+ ):
+ padding_size = ALIGN_REQUIREMENTS - (self.offset % ALIGN_REQUIREMENTS)
+ file.write(b"\0" * padding_size)
+ self.offset += padding_size
+
+ file.write(self.data)
+
+ def write_header(self, file):
+ file.write(
+ struct.pack(
+ "QQQb", self.offset, self.size, self.compressed_size, self.file_type
+ )
+ )
+ write_embedded_string(file, self.relative_path)
+
+
+class BundleManifest(object):
+ major: int
+ minor: int
+ bundle_id: str
+ deps_json: BundleFileEntry
+ runtimeconfig_json: BundleFileEntry
+ flags: int
+ files: List[BundleFileEntry]
+
+ def __init__(
+ self,
+ major: int,
+ minor: int,
+ bundle_id: str,
+ deps_json: BundleFileEntry,
+ runtimeconfig_json: BundleFileEntry,
+ flags: int,
+ files: List[BundleFileEntry],
+ ) -> None:
+ self.major = major
+ self.minor = minor
+ self.bundle_id = bundle_id
+ self.deps_json = deps_json
+ self.runtimeconfig_json = runtimeconfig_json
+ self.flags = flags
+ self.files = files
+
+ def write(self, file) -> int:
+ for bundle_file in self.files:
+ bundle_file.write(file)
+
+ bundle_header_offset = file.tell()
+ file.write(struct.pack("iiI", self.major, self.minor, len(self.files)))
+ write_embedded_string(file, self.bundle_id)
+
+ if self.deps_json is not None:
+ deps_json_location_offset = self.deps_json.offset
+ deps_json_location_size = self.deps_json.size
+ else:
+ deps_json_location_offset = 0
+ deps_json_location_size = 0
+
+ if self.runtimeconfig_json is not None:
+ runtimeconfig_json_location_offset = self.runtimeconfig_json.offset
+ runtimeconfig_json_location_size = self.runtimeconfig_json.size
+ else:
+ runtimeconfig_json_location_offset = 0
+ runtimeconfig_json_location_size = 0
+
+ file.write(
+ struct.pack("qq", deps_json_location_offset, deps_json_location_size)
+ )
+ file.write(
+ struct.pack(
+ "qq",
+ runtimeconfig_json_location_offset,
+ runtimeconfig_json_location_size,
+ )
+ )
+ file.write(struct.pack("q", self.flags))
+
+ for bundle_file in self.files:
+ bundle_file.write_header(file)
+
+ return bundle_header_offset
+
+
+def read_file_entry(
+ raw_data: bytes, header_bytes: bytes
+) -> Tuple[bytes, BundleFileEntry]:
+ (
+ offset,
+ size,
+ compressed_size,
+ file_type,
+ ) = struct.unpack("QQQb", header_bytes[:0x19])
+ (header_bytes, relative_path) = parse_embedded_string(header_bytes[0x19:])
+
+ target_size = compressed_size
+
+ if target_size == 0:
+ target_size = size
+
+ return (
+ header_bytes,
+ BundleFileEntry(
+ offset,
+ size,
+ compressed_size,
+ file_type,
+ relative_path,
+ raw_data[offset : offset + target_size],
+ ),
+ )
+
+
+def get_dotnet_bundle_data(data: bytes) -> Optional[Tuple[int, int, BundleManifest]]:
+ offset = data.find(hashlib.sha256(b".net core bundle\n").digest())
+
+ if offset == -1:
+ return None
+
+ raw_header_offset = data[offset - 8 : offset]
+ (header_offset,) = struct.unpack("q", raw_header_offset)
+ header_bytes = data[header_offset:]
+
+ (
+ major,
+ minor,
+ files_count,
+ ) = struct.unpack("iiI", header_bytes[:0xC])
+ header_bytes = header_bytes[0xC:]
+
+ (header_bytes, bundle_id) = parse_embedded_string(header_bytes)
+
+ # v2 header
+ (
+ deps_json_location_offset,
+ deps_json_location_size,
+ ) = struct.unpack("qq", header_bytes[:0x10])
+ (
+ runtimeconfig_json_location_offset,
+ runtimeconfig_json_location_size,
+ ) = struct.unpack("qq", header_bytes[0x10:0x20])
+ (flags,) = struct.unpack("q", header_bytes[0x20:0x28])
+ header_bytes = header_bytes[0x28:]
+
+ files = []
+
+ deps_json = None
+ runtimeconfig_json = None
+
+ for _ in range(files_count):
+ (header_bytes, file_entry) = read_file_entry(data, header_bytes)
+
+ files.append(file_entry)
+
+ if file_entry.offset == deps_json_location_offset:
+ deps_json = file_entry
+ elif file_entry.offset == runtimeconfig_json_location_offset:
+ runtimeconfig_json = file_entry
+
+ file_entry = files[0]
+
+ return (
+ file_entry.offset,
+ header_offset,
+ BundleManifest(
+ major, minor, bundle_id, deps_json, runtimeconfig_json, flags, files
+ ),
+ )
+
+
+LC_SYMTAB = 0x2
+LC_SEGMENT_64 = 0x19
+LC_CODE_SIGNATURE = 0x1D
+
+
+def fixup_linkedit(file, data: bytes, new_size: int):
+ offset = 0
+
+ (
+ macho_magic,
+ macho_cputype,
+ macho_cpusubtype,
+ macho_filetype,
+ macho_ncmds,
+ macho_sizeofcmds,
+ macho_flags,
+ macho_reserved,
+ ) = struct.unpack("IiiIIIII", data[offset : offset + 0x20])
+
+ offset += 0x20
+
+ linkedit_offset = None
+ symtab_offset = None
+ codesign_offset = None
+
+ for _ in range(macho_ncmds):
+ (cmd, cmdsize) = struct.unpack("II", data[offset : offset + 8])
+
+ if cmd == LC_SEGMENT_64:
+ (
+ cmd,
+ cmdsize,
+ segname_raw,
+ vmaddr,
+ vmsize,
+ fileoff,
+ filesize,
+ maxprot,
+ initprot,
+ nsects,
+ flags,
+ ) = struct.unpack("II16sQQQQiiII", data[offset : offset + 72])
+ segname = segname_raw.decode("utf-8").split("\0")[0]
+
+ if segname == "__LINKEDIT":
+ linkedit_offset = offset
+ elif cmd == LC_SYMTAB:
+ symtab_offset = offset
+ elif cmd == LC_CODE_SIGNATURE:
+ codesign_offset = offset
+
+ offset += cmdsize
+ pass
+
+ assert linkedit_offset is not None and symtab_offset is not None
+
+ # If there is a codesign section, clean it up.
+ if codesign_offset is not None:
+ (
+ codesign_cmd,
+ codesign_cmdsize,
+ codesign_dataoff,
+ codesign_datasize,
+ ) = struct.unpack("IIII", data[codesign_offset : codesign_offset + 16])
+ file.seek(codesign_offset)
+ file.write(b"\0" * codesign_cmdsize)
+
+ macho_ncmds -= 1
+ macho_sizeofcmds -= codesign_cmdsize
+ file.seek(0)
+ file.write(
+ struct.pack(
+ "IiiIIIII",
+ macho_magic,
+ macho_cputype,
+ macho_cpusubtype,
+ macho_filetype,
+ macho_ncmds,
+ macho_sizeofcmds,
+ macho_flags,
+ macho_reserved,
+ )
+ )
+
+ file.seek(codesign_dataoff)
+ file.write(b"\0" * codesign_datasize)
+
+ (
+ symtab_cmd,
+ symtab_cmdsize,
+ symtab_symoff,
+ symtab_nsyms,
+ symtab_stroff,
+ symtab_strsize,
+ ) = struct.unpack("IIIIII", data[symtab_offset : symtab_offset + 24])
+
+ symtab_strsize = new_size - symtab_stroff
+
+ new_symtab = struct.pack(
+ "IIIIII",
+ symtab_cmd,
+ symtab_cmdsize,
+ symtab_symoff,
+ symtab_nsyms,
+ symtab_stroff,
+ symtab_strsize,
+ )
+
+ file.seek(symtab_offset)
+ file.write(new_symtab)
+
+ (
+ linkedit_cmd,
+ linkedit_cmdsize,
+ linkedit_segname_raw,
+ linkedit_vmaddr,
+ linkedit_vmsize,
+ linkedit_fileoff,
+ linkedit_filesize,
+ linkedit_maxprot,
+ linkedit_initprot,
+ linkedit_nsects,
+ linkedit_flags,
+ ) = struct.unpack("II16sQQQQiiII", data[linkedit_offset : linkedit_offset + 72])
+
+ linkedit_filesize = new_size - linkedit_fileoff
+ linkedit_vmsize = linkedit_filesize
+
+ new_linkedit = struct.pack(
+ "II16sQQQQiiII",
+ linkedit_cmd,
+ linkedit_cmdsize,
+ linkedit_segname_raw,
+ linkedit_vmaddr,
+ linkedit_vmsize,
+ linkedit_fileoff,
+ linkedit_filesize,
+ linkedit_maxprot,
+ linkedit_initprot,
+ linkedit_nsects,
+ linkedit_flags,
+ )
+ file.seek(linkedit_offset)
+ file.write(new_linkedit)
+
+
+def write_bundle_data(
+ output,
+ old_bundle_base_offset: int,
+ new_bundle_base_offset: int,
+ bundle: BundleManifest,
+) -> int:
+ # Write bundle data
+ bundle_header_offset = bundle.write(output)
+ total_size = output.tell()
+
+ # Patch the header position
+ offset = file_data.find(hashlib.sha256(b".net core bundle\n").digest())
+ output.seek(offset - 8)
+ output.write(struct.pack("q", bundle_header_offset))
+
+ return total_size - new_bundle_base_offset
+
+
+input_directory: Path = Path(args.input_directory)
+content_directory: Path = Path(os.path.join(args.input_directory, "Contents"))
+executable_path: Path = Path(os.path.join(content_directory, args.executable_sub_path))
+
+
+def get_path_related_to_other_path(a: Path, b: Path) -> str:
+ temp = b
+
+ parts = []
+
+ while temp != a:
+ temp = temp.parent
+ parts.append(temp.name)
+
+ parts.remove(parts[-1])
+ parts.reverse()
+
+ return "/".join(parts)
+
+
+def get_path_related_to_target_exec(input_directory: Path, path: Path):
+ return "@executable_path/../" + get_path_related_to_other_path(
+ input_directory, path
+ )
+
+
+search_path = [
+ Path(os.path.join(content_directory, "Frameworks")),
+ Path(os.path.join(content_directory, "Resources/lib")),
+]
+
+
+for path in content_directory.rglob("**/*.dylib"):
+ current_search_path = [path.parent]
+ current_search_path.extend(search_path)
+
+ fixup_dylib(
+ path,
+ get_path_related_to_target_exec(content_directory, path),
+ current_search_path,
+ content_directory,
+ )
+
+for path in content_directory.rglob("**/*.so"):
+ current_search_path = [path.parent]
+ current_search_path.extend(search_path)
+
+ fixup_dylib(
+ path,
+ get_path_related_to_target_exec(content_directory, path),
+ current_search_path,
+ content_directory,
+ )
+
+
+with open(executable_path, "rb") as input:
+ file_data = input.read()
+
+
+(bundle_base_offset, bundle_header_offset, bundle) = get_dotnet_bundle_data(file_data)
+
+add_dylib_rpath(executable_path, "@executable_path/../Frameworks/")
+
+# Recent "vanilla" version of LLVM (LLVM 13 and upper) seems to really dislike how .NET package its assemblies.
+# As a result, after execution of install_name_tool it will have "fixed" the symtab resulting in a missing .NET bundle...
+# To mitigate that, we check if the bundle offset inside the binary is valid after install_name_tool and readd .NET bundle if not.
+output_file_size = os.stat(executable_path).st_size
+if output_file_size < bundle_header_offset:
+ print("LLVM broke the .NET bundle, readding bundle data...")
+ with open(executable_path, "r+b") as output:
+ file_data = output.read()
+ bundle_data_size = write_bundle_data(
+ output, bundle_base_offset, output_file_size, bundle
+ )
+
+ # Now patch the __LINKEDIT section
+ new_size = output_file_size + bundle_data_size
+ fixup_linkedit(output, file_data, new_size)
diff --git a/distribution/macos/construct_universal_dylib.py b/distribution/macos/construct_universal_dylib.py
new file mode 100644
index 0000000..b6c3770
--- /dev/null
+++ b/distribution/macos/construct_universal_dylib.py
@@ -0,0 +1,95 @@
+import argparse
+import os
+from pathlib import Path
+import platform
+import shutil
+import subprocess
+
+parser = argparse.ArgumentParser(
+ description="Construct Universal dylibs for nuget package"
+)
+parser.add_argument(
+ "arm64_input_directory", help="ARM64 Input directory containing dylibs"
+)
+parser.add_argument(
+ "x86_64_input_directory", help="x86_64 Input directory containing dylibs"
+)
+parser.add_argument("output_directory", help="Output directory")
+parser.add_argument("rglob", help="rglob")
+
+args = parser.parse_args()
+
+# Use Apple LLVM on Darwin, otherwise standard LLVM.
+if platform.system() == "Darwin":
+ LIPO = "lipo"
+else:
+ LIPO = shutil.which("llvm-lipo")
+
+ if LIPO is None:
+ for llvm_ver in [15, 14, 13]:
+ lipo_path = shutil.which(f"llvm-lipo-{llvm_ver}")
+ if lipo_path is not None:
+ LIPO = lipo_path
+ break
+
+if LIPO is None:
+ raise Exception("Cannot find a valid location for LLVM lipo!")
+
+arm64_input_directory: Path = Path(args.arm64_input_directory)
+x86_64_input_directory: Path = Path(args.x86_64_input_directory)
+output_directory: Path = Path(args.output_directory)
+rglob = args.rglob
+
+
+def get_new_name(
+ input_directory: Path, output_directory: str, input_dylib_path: Path
+) -> Path:
+ input_component = str(input_dylib_path).replace(str(input_directory), "")[1:]
+ return Path(os.path.join(output_directory, input_component))
+
+
+def is_fat_file(dylib_path: Path) -> str:
+ res = subprocess.check_output([LIPO, "-info", str(dylib_path.absolute())]).decode(
+ "utf-8"
+ )
+
+ return not res.split("\n")[0].startswith("Non-fat file")
+
+
+def construct_universal_dylib(
+ arm64_input_dylib_path: Path, x86_64_input_dylib_path: Path, output_dylib_path: Path
+):
+ if output_dylib_path.exists() or output_dylib_path.is_symlink():
+ os.remove(output_dylib_path)
+
+ os.makedirs(output_dylib_path.parent, exist_ok=True)
+
+ if arm64_input_dylib_path.is_symlink():
+ os.symlink(
+ os.path.basename(arm64_input_dylib_path.resolve()), output_dylib_path
+ )
+ else:
+ if is_fat_file(arm64_input_dylib_path) or not x86_64_input_dylib_path.exists():
+ with open(output_dylib_path, "wb") as dst:
+ with open(arm64_input_dylib_path, "rb") as src:
+ dst.write(src.read())
+ else:
+ subprocess.check_call(
+ [
+ LIPO,
+ str(arm64_input_dylib_path.absolute()),
+ str(x86_64_input_dylib_path.absolute()),
+ "-output",
+ str(output_dylib_path.absolute()),
+ "-create",
+ ]
+ )
+
+
+print(rglob)
+for path in arm64_input_directory.rglob("**/*.dylib"):
+ construct_universal_dylib(
+ path,
+ get_new_name(arm64_input_directory, x86_64_input_directory, path),
+ get_new_name(arm64_input_directory, output_directory, path),
+ )
diff --git a/distribution/macos/create_app_bundle.sh b/distribution/macos/create_app_bundle.sh
new file mode 100644
index 0000000..0fa54ea
--- /dev/null
+++ b/distribution/macos/create_app_bundle.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+set -e
+
+PUBLISH_DIRECTORY=$1
+OUTPUT_DIRECTORY=$2
+ENTITLEMENTS_FILE_PATH=$3
+
+APP_BUNDLE_DIRECTORY="$OUTPUT_DIRECTORY/Ryujinx.app"
+
+rm -rf "$APP_BUNDLE_DIRECTORY"
+mkdir -p "$APP_BUNDLE_DIRECTORY/Contents"
+mkdir "$APP_BUNDLE_DIRECTORY/Contents/Frameworks"
+mkdir "$APP_BUNDLE_DIRECTORY/Contents/MacOS"
+mkdir "$APP_BUNDLE_DIRECTORY/Contents/Resources"
+
+# Copy executable and nsure executable can be executed
+cp "$PUBLISH_DIRECTORY/Ryujinx" "$APP_BUNDLE_DIRECTORY/Contents/MacOS/Ryujinx"
+chmod u+x "$APP_BUNDLE_DIRECTORY/Contents/MacOS/Ryujinx"
+
+# Then all libraries
+cp "$PUBLISH_DIRECTORY"/*.dylib "$APP_BUNDLE_DIRECTORY/Contents/Frameworks"
+
+# Then resources
+cp Info.plist "$APP_BUNDLE_DIRECTORY/Contents"
+cp Ryujinx.icns "$APP_BUNDLE_DIRECTORY/Contents/Resources/Ryujinx.icns"
+cp updater.sh "$APP_BUNDLE_DIRECTORY/Contents/Resources/updater.sh"
+cp -r "$PUBLISH_DIRECTORY/THIRDPARTY.md" "$APP_BUNDLE_DIRECTORY/Contents/Resources"
+
+echo -n "APPL????" > "$APP_BUNDLE_DIRECTORY/Contents/PkgInfo"
+
+# Fixup libraries and executable
+python3 bundle_fix_up.py "$APP_BUNDLE_DIRECTORY" MacOS/Ryujinx
+
+# Now sign it
+if ! [ -x "$(command -v codesign)" ];
+then
+ if ! [ -x "$(command -v rcodesign)" ];
+ then
+ echo "Cannot find rcodesign on your system, please install rcodesign."
+ exit 1
+ fi
+
+ # cargo install apple-codesign
+ echo "Usign rcodesign for ad-hoc signing"
+ rcodesign sign --entitlements-xml-path "$ENTITLEMENTS_FILE_PATH" "$APP_BUNDLE_DIRECTORY"
+else
+ echo "Usign codesign for ad-hoc signing"
+ codesign --entitlements "$ENTITLEMENTS_FILE_PATH" -f --deep -s - "$APP_BUNDLE_DIRECTORY"
+fi
\ No newline at end of file
diff --git a/distribution/macos/create_macos_build_ava.sh b/distribution/macos/create_macos_build_ava.sh
new file mode 100644
index 0000000..23eafc1
--- /dev/null
+++ b/distribution/macos/create_macos_build_ava.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+set -e
+
+if [ "$#" -lt 7 ]; then
+ echo "usage "
+ exit 1
+fi
+
+mkdir -p "$1"
+mkdir -p "$2"
+mkdir -p "$3"
+
+BASE_DIR=$(readlink -f "$1")
+TEMP_DIRECTORY=$(readlink -f "$2")
+OUTPUT_DIRECTORY=$(readlink -f "$3")
+ENTITLEMENTS_FILE_PATH=$(readlink -f "$4")
+VERSION=$5
+SOURCE_REVISION_ID=$6
+CONFIGURATION=$7
+EXTRA_ARGS=$8
+
+if [ "$VERSION" == "1.1.0" ];
+then
+ RELEASE_TAR_FILE_NAME=ryujinx-$CONFIGURATION-$VERSION+$SOURCE_REVISION_ID-macos_universal.app.tar
+else
+ RELEASE_TAR_FILE_NAME=ryujinx-$VERSION-macos_universal.app.tar
+fi
+
+ARM64_APP_BUNDLE="$TEMP_DIRECTORY/output_arm64/Ryujinx.app"
+X64_APP_BUNDLE="$TEMP_DIRECTORY/output_x64/Ryujinx.app"
+UNIVERSAL_APP_BUNDLE="$OUTPUT_DIRECTORY/Ryujinx.app"
+EXECUTABLE_SUB_PATH=Contents/MacOS/Ryujinx
+
+rm -rf "$TEMP_DIRECTORY"
+mkdir -p "$TEMP_DIRECTORY"
+
+DOTNET_COMMON_ARGS=(-p:DebugType=embedded -p:Version="$VERSION" -p:SourceRevisionId="$SOURCE_REVISION_ID" --self-contained true $EXTRA_ARGS)
+
+dotnet restore
+dotnet build -c "$CONFIGURATION" src/Ryujinx
+dotnet publish -c "$CONFIGURATION" -r osx-arm64 -o "$TEMP_DIRECTORY/publish_arm64" "${DOTNET_COMMON_ARGS[@]}" src/Ryujinx
+dotnet publish -c "$CONFIGURATION" -r osx-x64 -o "$TEMP_DIRECTORY/publish_x64" "${DOTNET_COMMON_ARGS[@]}" src/Ryujinx
+
+# Get rid of the support library for ARMeilleure for x64 (that's only for arm64)
+rm -rf "$TEMP_DIRECTORY/publish_x64/libarmeilleure-jitsupport.dylib"
+
+# Get rid of libsoundio from arm64 builds as we don't have a arm64 variant
+# TODO: remove this once done
+rm -rf "$TEMP_DIRECTORY/publish_arm64/libsoundio.dylib"
+
+pushd "$BASE_DIR/distribution/macos"
+./create_app_bundle.sh "$TEMP_DIRECTORY/publish_x64" "$TEMP_DIRECTORY/output_x64" "$ENTITLEMENTS_FILE_PATH"
+./create_app_bundle.sh "$TEMP_DIRECTORY/publish_arm64" "$TEMP_DIRECTORY/output_arm64" "$ENTITLEMENTS_FILE_PATH"
+popd
+
+rm -rf "$UNIVERSAL_APP_BUNDLE"
+mkdir -p "$OUTPUT_DIRECTORY"
+
+# Let's copy one of the two different app bundle and remove the executable
+cp -R "$ARM64_APP_BUNDLE" "$UNIVERSAL_APP_BUNDLE"
+rm "$UNIVERSAL_APP_BUNDLE/$EXECUTABLE_SUB_PATH"
+
+# Make it libraries universal
+python3 "$BASE_DIR/distribution/macos/construct_universal_dylib.py" "$ARM64_APP_BUNDLE" "$X64_APP_BUNDLE" "$UNIVERSAL_APP_BUNDLE" "**/*.dylib"
+
+if ! [ -x "$(command -v lipo)" ];
+then
+ if ! [ -x "$(command -v llvm-lipo-14)" ];
+ then
+ LIPO=llvm-lipo
+ else
+ LIPO=llvm-lipo-14
+ fi
+else
+ LIPO=lipo
+fi
+
+# Make the executable universal
+$LIPO "$ARM64_APP_BUNDLE/$EXECUTABLE_SUB_PATH" "$X64_APP_BUNDLE/$EXECUTABLE_SUB_PATH" -output "$UNIVERSAL_APP_BUNDLE/$EXECUTABLE_SUB_PATH" -create
+
+# Patch up the Info.plist to have appropriate version
+sed -r -i.bck "s/\%\%RYUJINX_BUILD_VERSION\%\%/$VERSION/g;" "$UNIVERSAL_APP_BUNDLE/Contents/Info.plist"
+sed -r -i.bck "s/\%\%RYUJINX_BUILD_GIT_HASH\%\%/$SOURCE_REVISION_ID/g;" "$UNIVERSAL_APP_BUNDLE/Contents/Info.plist"
+rm "$UNIVERSAL_APP_BUNDLE/Contents/Info.plist.bck"
+
+# Now sign it
+if ! [ -x "$(command -v codesign)" ];
+then
+ if ! [ -x "$(command -v rcodesign)" ];
+ then
+ echo "Cannot find rcodesign on your system, please install rcodesign."
+ exit 1
+ fi
+
+ # NOTE: Currently require https://github.com/indygreg/apple-platform-rs/pull/44 to work on other OSes.
+ # cargo install --git "https://github.com/marysaka/apple-platform-rs" --branch "fix/adhoc-app-bundle" apple-codesign --bin "rcodesign"
+ echo "Using rcodesign for ad-hoc signing"
+ rcodesign sign --entitlements-xml-path "$ENTITLEMENTS_FILE_PATH" "$UNIVERSAL_APP_BUNDLE"
+else
+ echo "Using codesign for ad-hoc signing"
+ codesign --entitlements "$ENTITLEMENTS_FILE_PATH" -f --deep -s - "$UNIVERSAL_APP_BUNDLE"
+fi
+
+echo "Creating archive"
+pushd "$OUTPUT_DIRECTORY"
+tar --exclude "Ryujinx.app/Contents/MacOS/Ryujinx" -cvf "$RELEASE_TAR_FILE_NAME" Ryujinx.app 1> /dev/null
+python3 "$BASE_DIR/distribution/misc/add_tar_exec.py" "$RELEASE_TAR_FILE_NAME" "Ryujinx.app/Contents/MacOS/Ryujinx" "Ryujinx.app/Contents/MacOS/Ryujinx"
+gzip -9 < "$RELEASE_TAR_FILE_NAME" > "$RELEASE_TAR_FILE_NAME.gz"
+rm "$RELEASE_TAR_FILE_NAME"
+
+# Create legacy update package for Avalonia to not left behind old testers.
+if [ "$VERSION" != "1.1.0" ];
+then
+ cp $RELEASE_TAR_FILE_NAME.gz test-ava-ryujinx-$VERSION-macos_universal.app.tar.gz
+fi
+
+popd
+
+echo "Done"
\ No newline at end of file
diff --git a/distribution/macos/create_macos_build_headless.sh b/distribution/macos/create_macos_build_headless.sh
new file mode 100644
index 0000000..a439aef
--- /dev/null
+++ b/distribution/macos/create_macos_build_headless.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+set -e
+
+if [ "$#" -lt 7 ]; then
+ echo "usage "
+ exit 1
+fi
+
+mkdir -p "$1"
+mkdir -p "$2"
+mkdir -p "$3"
+
+BASE_DIR=$(readlink -f "$1")
+TEMP_DIRECTORY=$(readlink -f "$2")
+OUTPUT_DIRECTORY=$(readlink -f "$3")
+ENTITLEMENTS_FILE_PATH=$(readlink -f "$4")
+VERSION=$5
+SOURCE_REVISION_ID=$6
+CONFIGURATION=$7
+EXTRA_ARGS=$8
+
+if [ "$VERSION" == "1.1.0" ];
+then
+ RELEASE_TAR_FILE_NAME=sdl2-ryujinx-headless-$CONFIGURATION-$VERSION+$SOURCE_REVISION_ID-macos_universal.tar
+else
+ RELEASE_TAR_FILE_NAME=sdl2-ryujinx-headless-$VERSION-macos_universal.tar
+fi
+
+ARM64_OUTPUT="$TEMP_DIRECTORY/publish_arm64"
+X64_OUTPUT="$TEMP_DIRECTORY/publish_x64"
+UNIVERSAL_OUTPUT="$OUTPUT_DIRECTORY/publish"
+EXECUTABLE_SUB_PATH=Ryujinx.Headless.SDL2
+
+rm -rf "$TEMP_DIRECTORY"
+mkdir -p "$TEMP_DIRECTORY"
+
+DOTNET_COMMON_ARGS=(-p:DebugType=embedded -p:Version="$VERSION" -p:SourceRevisionId="$SOURCE_REVISION_ID" --self-contained true $EXTRA_ARGS)
+
+dotnet restore
+dotnet build -c "$CONFIGURATION" src/Ryujinx.Headless.SDL2
+dotnet publish -c "$CONFIGURATION" -r osx-arm64 -o "$TEMP_DIRECTORY/publish_arm64" "${DOTNET_COMMON_ARGS[@]}" src/Ryujinx.Headless.SDL2
+dotnet publish -c "$CONFIGURATION" -r osx-x64 -o "$TEMP_DIRECTORY/publish_x64" "${DOTNET_COMMON_ARGS[@]}" src/Ryujinx.Headless.SDL2
+
+# Get rid of the support library for ARMeilleure for x64 (that's only for arm64)
+rm -rf "$TEMP_DIRECTORY/publish_x64/libarmeilleure-jitsupport.dylib"
+
+# Get rid of libsoundio from arm64 builds as we don't have a arm64 variant
+# TODO: remove this once done
+rm -rf "$TEMP_DIRECTORY/publish_arm64/libsoundio.dylib"
+
+rm -rf "$OUTPUT_DIRECTORY"
+mkdir -p "$OUTPUT_DIRECTORY"
+
+# Let's copy one of the two different outputs and remove the executable
+cp -R "$ARM64_OUTPUT/" "$UNIVERSAL_OUTPUT"
+rm "$UNIVERSAL_OUTPUT/$EXECUTABLE_SUB_PATH"
+
+# Make it libraries universal
+python3 "$BASE_DIR/distribution/macos/construct_universal_dylib.py" "$ARM64_OUTPUT" "$X64_OUTPUT" "$UNIVERSAL_OUTPUT" "**/*.dylib"
+
+if ! [ -x "$(command -v lipo)" ];
+then
+ if ! [ -x "$(command -v llvm-lipo-14)" ];
+ then
+ LIPO=llvm-lipo
+ else
+ LIPO=llvm-lipo-14
+ fi
+else
+ LIPO=lipo
+fi
+
+# Make the executable universal
+$LIPO "$ARM64_OUTPUT/$EXECUTABLE_SUB_PATH" "$X64_OUTPUT/$EXECUTABLE_SUB_PATH" -output "$UNIVERSAL_OUTPUT/$EXECUTABLE_SUB_PATH" -create
+
+# Now sign it
+if ! [ -x "$(command -v codesign)" ];
+then
+ if ! [ -x "$(command -v rcodesign)" ];
+ then
+ echo "Cannot find rcodesign on your system, please install rcodesign."
+ exit 1
+ fi
+
+ # NOTE: Currently require https://github.com/indygreg/apple-platform-rs/pull/44 to work on other OSes.
+ # cargo install --git "https://github.com/marysaka/apple-platform-rs" --branch "fix/adhoc-app-bundle" apple-codesign --bin "rcodesign"
+ echo "Using rcodesign for ad-hoc signing"
+ for FILE in "$UNIVERSAL_OUTPUT"/*; do
+ if [[ $(file "$FILE") == *"Mach-O"* ]]; then
+ rcodesign sign --entitlements-xml-path "$ENTITLEMENTS_FILE_PATH" "$FILE"
+ fi
+ done
+else
+ echo "Using codesign for ad-hoc signing"
+ for FILE in "$UNIVERSAL_OUTPUT"/*; do
+ if [[ $(file "$FILE") == *"Mach-O"* ]]; then
+ codesign --entitlements "$ENTITLEMENTS_FILE_PATH" -f --deep -s - "$FILE"
+ fi
+ done
+fi
+
+echo "Creating archive"
+pushd "$OUTPUT_DIRECTORY"
+tar --exclude "publish/Ryujinx.Headless.SDL2" -cvf "$RELEASE_TAR_FILE_NAME" publish 1> /dev/null
+python3 "$BASE_DIR/distribution/misc/add_tar_exec.py" "$RELEASE_TAR_FILE_NAME" "publish/Ryujinx.Headless.SDL2" "publish/Ryujinx.Headless.SDL2"
+gzip -9 < "$RELEASE_TAR_FILE_NAME" > "$RELEASE_TAR_FILE_NAME.gz"
+rm "$RELEASE_TAR_FILE_NAME"
+popd
+
+echo "Done"
\ No newline at end of file
diff --git a/distribution/macos/entitlements.xml b/distribution/macos/entitlements.xml
new file mode 100644
index 0000000..bf31850
--- /dev/null
+++ b/distribution/macos/entitlements.xml
@@ -0,0 +1,23 @@
+
+
+
+
+ com.apple.security.cs.allow-jit
+
+ com.apple.security.cs.allow-unsigned-executable-memory
+
+ com.apple.security.cs.allow-dyld-environment-variables
+
+ com.apple.security.cs.disable-library-validation
+
+ com.apple.security.cs.disable-executable-page-protection
+
+ com.apple.security.cs.debugger
+
+ com.apple.security.get-task-allow
+
+ com.apple.security.hypervisor
+
+
+
diff --git a/distribution/macos/shortcut-launch-script.sh b/distribution/macos/shortcut-launch-script.sh
new file mode 100644
index 0000000..784d780
--- /dev/null
+++ b/distribution/macos/shortcut-launch-script.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+launch_arch="$(uname -m)"
+if [ "$(sysctl -in sysctl.proc_translated)" = "1" ]
+then
+ launch_arch="arm64"
+fi
+
+arch -$launch_arch {0} {1}
diff --git a/distribution/macos/shortcut-template.plist b/distribution/macos/shortcut-template.plist
new file mode 100644
index 0000000..27a9e46
--- /dev/null
+++ b/distribution/macos/shortcut-template.plist
@@ -0,0 +1,35 @@
+
+
+
+
+ CFBundleDevelopmentRegion
+ English
+ CFBundleExecutable
+ {0}
+ CFBundleGetInfoString
+ {1}
+ CFBundleIconFile
+ {2}
+ CFBundleInfoDictionaryVersion
+ 6.0
+ CFBundleVersion
+ 1.0
+ NSHighResolutionCapable
+
+ CSResourcesFileMapped
+
+ NSHumanReadableCopyright
+ Copyright © 2018 - 2023 Ryujinx Team and Contributors.
+ LSApplicationCategoryType
+ public.app-category.games
+ LSMinimumSystemVersion
+ 11.0
+ UIPrerenderedIcon
+
+ LSEnvironment
+
+ DOTNET_DefaultStackSize
+ 200000
+
+
+
diff --git a/distribution/macos/updater.sh b/distribution/macos/updater.sh
new file mode 100644
index 0000000..12e4c3a
--- /dev/null
+++ b/distribution/macos/updater.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+set -e
+
+INSTALL_DIRECTORY=$1
+NEW_APP_DIRECTORY=$2
+APP_PID=$3
+APP_ARGUMENTS=("${@:4}")
+
+error_handler() {
+ local lineno="$1"
+
+ script="""
+ set alertTitle to \"Ryujinx - Updater error\"
+ set alertMessage to \"An error occurred during Ryujinx update (updater.sh:$lineno)\n\nPlease download the update manually from our website if the problem persists.\"
+ display dialog alertMessage with icon caution with title alertTitle buttons {\"Open Download Page\", \"Exit\"}
+ set the button_pressed to the button returned of the result
+
+ if the button_pressed is \"Open Download Page\" then
+ open location \"https://ryujinx.org/download\"
+ end if
+ """
+
+ osascript -e "$script"
+ exit 1
+}
+
+trap 'error_handler ${LINENO}' ERR
+
+# Wait for Ryujinx to exit.
+# If the main process is still acitve, we wait for 1 second and check it again.
+# After the fifth time checking, this script exits with status 1.
+
+attempt=0
+while true; do
+ if lsof -p "$APP_PID" +r 1 &>/dev/null || ps -p "$APP_PID" &>/dev/null; then
+ if [ "$attempt" -eq 4 ]; then
+ exit 1
+ fi
+ sleep 1
+ else
+ break
+ fi
+ (( attempt++ ))
+done
+
+sleep 1
+
+# Now replace and reopen.
+rm -rf "$INSTALL_DIRECTORY"
+mv "$NEW_APP_DIRECTORY" "$INSTALL_DIRECTORY"
+
+if [ "$#" -le 3 ]; then
+ open -a "$INSTALL_DIRECTORY"
+else
+ open -a "$INSTALL_DIRECTORY" --args "${APP_ARGUMENTS[@]}"
+fi
\ No newline at end of file
diff --git a/distribution/misc/Logo.svg b/distribution/misc/Logo.svg
new file mode 100644
index 0000000..d6a7631
--- /dev/null
+++ b/distribution/misc/Logo.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/distribution/misc/add_tar_exec.py b/distribution/misc/add_tar_exec.py
new file mode 100644
index 0000000..fe659c1
--- /dev/null
+++ b/distribution/misc/add_tar_exec.py
@@ -0,0 +1,24 @@
+import argparse
+from io import BytesIO
+import tarfile
+
+parser = argparse.ArgumentParser(
+ description="Add the main binary to a tar and force it to be executable"
+)
+parser.add_argument("input_tar_file", help="input tar file")
+parser.add_argument("main_binary_path", help="Main executable path")
+parser.add_argument("main_binary_tar_path", help="Main executable tar path")
+
+args = parser.parse_args()
+input_tar_file = args.input_tar_file
+main_binary_path = args.main_binary_path
+main_binary_tar_path = args.main_binary_tar_path
+
+with open(main_binary_path, "rb") as f:
+ with tarfile.open(input_tar_file, "a") as tar:
+ data = f.read()
+ tar_info = tarfile.TarInfo(main_binary_tar_path)
+ tar_info.mode = 0o755
+ tar_info.size = len(data)
+
+ tar.addfile(tar_info, BytesIO(data))
diff --git a/distribution/windows/alsoft.ini b/distribution/windows/alsoft.ini
new file mode 100644
index 0000000..a142619
--- /dev/null
+++ b/distribution/windows/alsoft.ini
@@ -0,0 +1,2 @@
+[General]
+stereo-mode=speakers
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..a22da3c
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,35 @@
+# Documents Index
+
+This repo includes several documents that explain both high-level and low-level concepts about Ryujinx and its functions. These are very useful for contributors, to get context that can be very difficult to acquire from just reading code.
+
+Intro to Ryujinx
+==================
+
+Ryujinx is an open-source Nintendo Switch emulator, created by gdkchan, written in C#.
+* The CPU emulator, ARMeilleure, emulates an ARMv8 CPU and currently has support for most 64-bit ARMv8 and some of the ARMv7 (and older) instructions.
+* The GPU emulator emulates the Switch's Maxwell GPU using either the OpenGL (version 4.5 minimum), Vulkan, or Metal (via MoltenVK) APIs through a custom build of OpenTK or Silk.NET respectively.
+* Audio output is entirely supported via C# wrappers for SDL2, with OpenAL & libsoundio as fallbacks.
+
+Getting Started
+===============
+
+- [Installing the .NET SDK](https://dotnet.microsoft.com/download)
+- [Official .NET Docs](https://docs.microsoft.com/dotnet/core/)
+
+Contributing (Building, testing, benchmarking, profiling, etc.)
+===============
+
+If you want to contribute a code change to this repo, start here.
+
+- [Contributor Guide](../CONTRIBUTING.md)
+
+Coding Guidelines
+=================
+
+- [C# coding style](coding-guidelines/coding-style.md)
+- [Service Implementation Guidelines - WIP](https://gist.github.com/gdkchan/84ba88cd50efbe58d1babfaa7cd7c455)
+
+Project Docs
+=================
+
+To be added. Many project files will contain basic XML docs for key functions and classes in the meantime.
diff --git a/docs/coding-guidelines/coding-style.md b/docs/coding-guidelines/coding-style.md
new file mode 100644
index 0000000..9c84055
--- /dev/null
+++ b/docs/coding-guidelines/coding-style.md
@@ -0,0 +1,116 @@
+# C# Coding Style
+
+The general rule we follow is "use Visual Studio defaults".
+Using an IDE that supports the `.editorconfig` standard will make this much simpler.
+
+1. We use [Allman style](http://en.wikipedia.org/wiki/Indent_style#Allman_style) braces, where each brace begins on a new line. A single line statement block can go without braces but the block must be properly indented on its own line and must not be nested in other statement blocks that use braces (See rule 18 for more details). One exception is that a `using` statement is permitted to be nested within another `using` statement by starting on the following line at the same indentation level, even if the nested `using` contains a controlled block.
+2. We use four spaces of indentation (no tabs).
+3. We use `_camelCase` for internal and private fields and use `readonly` where possible. Prefix internal and private instance fields with `_`, static fields with `s_` and thread static fields with `t_`. When used on static fields, `readonly` should come after `static` (e.g. `static readonly` not `readonly static`). Public fields should be used sparingly and should use PascalCasing with no prefix when used.
+4. We avoid `this.` unless absolutely necessary.
+5. We always specify the visibility, even if it's the default (e.g.
+ `private string _foo` not `string _foo`). Visibility should be the first modifier (e.g.
+ `public abstract` not `abstract public`).
+6. Namespace imports should be specified at the top of the file, *outside* of `namespace` declarations.
+7. Avoid more than one empty line at any time. For example, do not have two
+ blank lines between members of a type.
+8. Avoid spurious free spaces.
+ For example avoid `if (someVar == 0)...`, where the dots mark the spurious free spaces.
+ Consider enabling "View White Space (Ctrl+R, Ctrl+W)" or "Edit -> Advanced -> View White Space" if using Visual Studio to aid detection.
+9. If a file happens to differ in style from these guidelines (e.g. private members are named `m_member`
+ rather than `_member`), the existing style in that file takes precedence.
+10. We only use `var` when the type is explicitly named on the right-hand side, typically due to either `new` or an explicit cast, e.g. `var stream = new FileStream(...)` not `var stream = OpenStandardInput()`.
+ - Similarly, target-typed `new()` can only be used when the type is explicitly named on the left-hand side, in a variable definition statement or a field definition statement. e.g. `FileStream stream = new(...);`, but not `stream = new(...);` (where the type was specified on a previous line).
+11. We use language keywords instead of BCL types (e.g. `int, string, float` instead of `Int32, String, Single`, etc) for both type references as well as method calls (e.g. `int.Parse` instead of `Int32.Parse`). See issue [#13976](https://github.com/dotnet/runtime/issues/13976) for examples.
+12. We use PascalCasing to name all our constant local variables and fields. The only exception is for interop code where the constant value should exactly match the name and value of the code you are calling via interop.
+13. We use PascalCasing for all method names, including local functions.
+14. We use ```nameof(...)``` instead of ```"..."``` whenever possible and relevant.
+15. Fields should be specified at the top within type declarations.
+16. When including non-ASCII characters in the source code use Unicode escape sequences (\uXXXX) instead of literal characters. Literal non-ASCII characters occasionally get garbled by a tool or editor.
+17. When using labels (for goto), indent the label one less than the current indentation.
+18. When using a single-statement if, we follow these conventions:
+ - Never use single-line form (for example: `if (source == null) throw new ArgumentNullException("source");`)
+ - Using braces is always accepted, and required if any block of an `if`/`else if`/.../`else` compound statement uses braces or if a single statement body spans multiple lines.
+ - Braces may be omitted only if the body of *every* block associated with an `if`/`else if`/.../`else` compound statement is placed on a single line.
+19. Make all internal and private types static or sealed unless derivation from them is required. As with any implementation detail, they can be changed if/when derivation is required in the future.
+20. XML docs should be used when writing interfaces or when a class/method is deemed sufficient in scope or complexity.
+21. So-called [Magic Numbers](https://en.wikipedia.org/wiki/Magic_number_(programming)) should be defined as named constants before use (for example `for (int i = 56; i < 68; i++)` could read `for (int i = _currentAge; i < _retireAge; i++)`).
+ This may be ignored for trivial or syntactically common statements.
+
+An [EditorConfig](https://editorconfig.org "EditorConfig homepage") file (`.editorconfig`) has been provided at the root of the runtime repository, enabling C# auto-formatting conforming to the above guidelines.
+
+### Example File:
+
+``ShaderCache.cs:``
+
+```C#
+using Ryujinx.Common.Configuration;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Engine.Threed;
+using Ryujinx.Graphics.Gpu.Engine.Types;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Gpu.Shader.DiskCache;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+ ///
+ /// Memory cache of shader code.
+ ///
+ class ShaderCache : IDisposable
+ {
+ ///
+ /// Default flags used on the shader translation process.
+ ///
+ public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode;
+
+ private readonly struct TranslatedShader
+ {
+ public readonly CachedShaderStage Shader;
+ public readonly ShaderProgram Program;
+
+ public TranslatedShader(CachedShaderStage shader, ShaderProgram program)
+ {
+ Shader = shader;
+ Program = program;
+ }
+ }
+ ...
+
+ ///
+ /// Processes the queue of shaders that must save their binaries to the disk cache.
+ ///
+ public void ProcessShaderCacheQueue()
+ {
+ // Check to see if the binaries for previously compiled shaders are ready, and save them out.
+
+ while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave))
+ {
+ ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false);
+
+ if (result != ProgramLinkStatus.Incomplete)
+ {
+ if (result == ProgramLinkStatus.Success)
+ {
+ _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary());
+ }
+
+ _programsToSaveQueue.Dequeue();
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+ }
+}
+```
+
+For other languages, our current best guidance is consistency. When editing files, keep new code and changes consistent with the style in the files. For new files, it should conform to the style for that component. If there is a completely new component, anything that is reasonably broadly accepted is fine.
diff --git a/docs/workflow/pr-guide.md b/docs/workflow/pr-guide.md
new file mode 100644
index 0000000..cc2c590
--- /dev/null
+++ b/docs/workflow/pr-guide.md
@@ -0,0 +1,56 @@
+# Pull Request Guide
+
+## Contributing Rules
+
+All contributions to Ryujinx/Ryujinx repository are made via pull requests (PRs) rather than through direct commits. The pull requests are reviewed and merged by the maintainers after a review and at least two approvals from the core development team.
+
+To merge pull requests, you must have write permissions in the repository.
+
+## Quick Code Review Rules
+
+* Do not mix unrelated changes in one pull request. For example, a code style change should never be mixed with a bug fix.
+* All changes should follow the existing code style. You can read more about our code style at [docs/coding-guidelines](../coding-guidelines/coding-style.md).
+* Adding external dependencies is to be avoided unless not doing so would introduce _significant_ complexity. Any dependency addition should be justified and discussed before merge.
+* Use Draft pull requests for changes you are still working on but want early CI loop feedback. When you think your changes are ready for review, [change the status](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) of your pull request.
+* Rebase your changes when required or directly requested. Changes should always be commited on top of the upstream branch, not the other way around.
+* If you are asked to make changes during the review process do them as a new commit.
+* Only resolve GitHub conversations with reviewers once they have been addressed with a commit, or via a mutual agreement.
+
+## Pull Request Ownership
+
+Every pull request will have automatically have labels and reviewers assigned. The label not only indicates the code segment which the change touches but also the area reviewers to be assigned.
+
+If during the code review process a merge conflict occurs, the PR author is responsible for its resolution. Help will be provided if necessary although GitHub makes this easier by allowing simple conflict resolution using the [conflict-editor](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/resolving-a-merge-conflict-on-github).
+
+## Pull Request Builds
+
+When submitting a PR to the `Ryujinx/Ryujinx` repository, various builds will run validating many areas to ensure we keep developer productivity and product quality high. These various workflows can be tracked in the [Actions](https://github.com/Ryujinx/Ryujinx/actions) tab of the repository. If the job continues to completion, the build artifacts will be uploaded and posted as a comment in the PR discussion.
+
+## Review Turnaround Times
+
+Ryujinx is a project that is maintained by volunteers on a completely free-time basis. As such we cannot guarantee any particular timeframe for pull request review and approval. Weeks to months are common for larger (>500 line) PRs but there are some additional best practises to avoid review purgatory.
+
+* Make the reviewers life easier wherever possible. Make use of descriptive commit names, code comments and XML docs where applicable.
+* If there is disagreement on feedback then always lean on the side of the development team and community over any personal opinion.
+* We're human. We miss things. We forget things. If there has been radio silence on your changes for a substantial period of time then do not hesitate to reach out directly either with something simple like "bump" on GitHub or a directly on Discord.
+
+To re-iterate, make the review as easy for us as possible, respond promptly and be comfortable to interact directly with us for anything else.
+
+## Merging Pull Requests
+
+Anyone with write access can merge a pull request manually when the following conditions have been met:
+
+* The PR has been approved by two reviewers and any other objections are addressed.
+ * You can request follow up reviews from the original reviewers if they requested changes.
+* The PR successfully builds and passes all tests in the Continuous Integration (CI) system. In case of failures, refer to the [Actions](https://github.com/Ryujinx/Ryujinx/actions) tab of your PR.
+
+Typically, PRs are merged as one commit (squash merges). It creates a simpler history than a Merge Commit. "Special circumstances" are rare, and typically mean that there are a series of cleanly separated changes that will be too hard to understand if squashed together, or for some reason we want to preserve the ability to dissect them.
+
+## Blocking Pull Request Merging
+
+If for whatever reason you would like to move your pull request back to an in-progress status to avoid merging it in the current form, you can turn the PR into a draft PR by selecting the option under the reviewers section. Alternatively, you can do that by adding [WIP] prefix to the pull request title.
+
+## Old Pull Request Policy
+
+From time to time we will review older PRs and check them for relevance. If we find the PR is inactive or no longer applies, we will close it. As the PR owner, you can simply reopen it if you feel your closed PR needs our attention.
+
diff --git a/global.json b/global.json
new file mode 100644
index 0000000..391ba3c
--- /dev/null
+++ b/global.json
@@ -0,0 +1,6 @@
+{
+ "sdk": {
+ "version": "8.0.100",
+ "rollForward": "latestFeature"
+ }
+}
diff --git a/nuget.config b/nuget.config
new file mode 100644
index 0000000..80f5bd7
--- /dev/null
+++ b/nuget.config
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/src/ARMeilleure/ARMeilleure.csproj b/src/ARMeilleure/ARMeilleure.csproj
new file mode 100644
index 0000000..550e50c
--- /dev/null
+++ b/src/ARMeilleure/ARMeilleure.csproj
@@ -0,0 +1,26 @@
+
+
+
+ net8.0
+ true
+
+
+
+
+
+
+
+
+
+ PreserveNewest
+ libarmeilleure-jitsupport.dylib
+
+
+
+
+
+ <_Parameter1>Ryujinx.Tests
+
+
+
+
diff --git a/src/ARMeilleure/Allocators.cs b/src/ARMeilleure/Allocators.cs
new file mode 100644
index 0000000..fba3026
--- /dev/null
+++ b/src/ARMeilleure/Allocators.cs
@@ -0,0 +1,39 @@
+using ARMeilleure.Common;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure
+{
+ static class Allocators
+ {
+ [ThreadStatic] private static ArenaAllocator _default;
+ [ThreadStatic] private static ArenaAllocator _operands;
+ [ThreadStatic] private static ArenaAllocator _operations;
+ [ThreadStatic] private static ArenaAllocator _references;
+ [ThreadStatic] private static ArenaAllocator _liveRanges;
+ [ThreadStatic] private static ArenaAllocator _liveIntervals;
+
+ public static ArenaAllocator Default => GetAllocator(ref _default, 256 * 1024, 4);
+ public static ArenaAllocator Operands => GetAllocator(ref _operands, 64 * 1024, 8);
+ public static ArenaAllocator Operations => GetAllocator(ref _operations, 64 * 1024, 8);
+ public static ArenaAllocator References => GetAllocator(ref _references, 64 * 1024, 8);
+ public static ArenaAllocator LiveRanges => GetAllocator(ref _liveRanges, 64 * 1024, 8);
+ public static ArenaAllocator LiveIntervals => GetAllocator(ref _liveIntervals, 64 * 1024, 8);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static ArenaAllocator GetAllocator(ref ArenaAllocator alloc, uint pageSize, uint pageCount)
+ {
+ alloc ??= new ArenaAllocator(pageSize, pageCount);
+
+ return alloc;
+ }
+
+ public static void ResetAll()
+ {
+ Default.Reset();
+ Operands.Reset();
+ Operations.Reset();
+ References.Reset();
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
new file mode 100644
index 0000000..00ffd19
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
@@ -0,0 +1,270 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class Arm64Optimizer
+ {
+ private const int MaxConstantUses = 10000;
+
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ var constants = new Dictionary();
+
+ Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+ {
+ // If the constant has many uses, we also force a new constant mov to be added, in order
+ // to avoid overflow of the counts field (that is limited to 16 bits).
+ if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+ {
+ constant = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, constant, source);
+
+ block.Operations.AddBefore(operation, copyOp);
+
+ constants[source.Value] = constant;
+ }
+
+ return constant;
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ constants.Clear();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ // Insert copies for constants that can't fit on a 32-bit immediate.
+ // Doing this early unblocks a few optimizations.
+ if (node.Instruction == Instruction.Add)
+ {
+ Operand src1 = node.GetSource(0);
+ Operand src2 = node.GetSource(1);
+
+ if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
+ {
+ node.SetSource(0, GetConstantCopy(block, node, src1));
+ }
+
+ if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
+ {
+ node.SetSource(1, GetConstantCopy(block, node, src2));
+ }
+ }
+
+ // Try to fold something like:
+ // lsl x1, x1, #2
+ // add x0, x0, x1
+ // ldr x0, [x0]
+ // add x2, x2, #16
+ // ldr x2, [x2]
+ // Into:
+ // ldr x0, [x0, x1, lsl #2]
+ // ldr x2, [x2, #16]
+ if (IsMemoryLoadOrStore(node.Instruction))
+ {
+ OperandType type;
+
+ if (node.Destination != default)
+ {
+ type = node.Destination.Type;
+ }
+ else
+ {
+ type = node.GetSource(1).Type;
+ }
+
+ Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+
+ if (memOp != default)
+ {
+ node.SetSource(0, memOp);
+ }
+ }
+ }
+ }
+
+ Optimizer.RemoveUnusedNodes(cfg);
+ }
+
+ private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+ {
+ Operand baseOp = addr;
+
+ // First we check if the address is the result of a local X with immediate
+ // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+ // becomes the addition immediate. Otherwise baseOp keeps being the address.
+ int imm = GetConstOp(ref baseOp, type);
+ if (imm != 0)
+ {
+ return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
+ }
+
+ // Now we check if the baseOp is the result of a local Y with a local Z addition.
+ // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+ // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
+ // if that is the case, we set indexOp to W and adjust the scale value of the memory operand
+ // to match that of the left shift.
+ // There is one missed case, which is the address being a shift result, but this is
+ // probably not worth optimizing as it should never happen.
+ (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
+
+ // If baseOp is still equal to address, then there's nothing that can be optimized.
+ if (baseOp == addr)
+ {
+ return default;
+ }
+
+ return MemoryOp(type, baseOp, indexOp, scale, 0);
+ }
+
+ private static int GetConstOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (operation == default)
+ {
+ return 0;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ Operand constOp;
+ Operand otherOp;
+
+ if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+ {
+ constOp = src1;
+ otherOp = src2;
+ }
+ else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+ {
+ constOp = src2;
+ otherOp = src1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ // If we have addition by a constant that we can't encode on the instruction,
+ // then we can't optimize it further.
+ if (ConstTooLong(constOp, accessType))
+ {
+ return 0;
+ }
+
+ baseOp = otherOp;
+
+ return constOp.AsInt32();
+ }
+
+ private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operand indexOp = default;
+
+ Multiplier scale = Multiplier.x1;
+
+ Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (addOp == default)
+ {
+ return (indexOp, scale);
+ }
+
+ Operand src1 = addOp.GetSource(0);
+ Operand src2 = addOp.GetSource(1);
+
+ if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+ {
+ return (indexOp, scale);
+ }
+
+ baseOp = src1;
+ indexOp = src2;
+
+ Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+
+ bool indexOnSrc2 = false;
+
+ if (shlOp == default)
+ {
+ shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+
+ indexOnSrc2 = true;
+ }
+
+ if (shlOp != default)
+ {
+ Operand shSrc = shlOp.GetSource(0);
+ Operand shift = shlOp.GetSource(1);
+
+ int maxShift = Assembler.GetScaleForType(accessType);
+
+ if (shSrc.Kind == OperandKind.LocalVariable &&
+ shift.Kind == OperandKind.Constant &&
+ (shift.Value == 0 || shift.Value == (ulong)maxShift))
+ {
+ scale = shift.Value switch
+ {
+ 1 => Multiplier.x2,
+ 2 => Multiplier.x4,
+ 3 => Multiplier.x8,
+ 4 => Multiplier.x16,
+ _ => Multiplier.x1,
+ };
+
+ baseOp = indexOnSrc2 ? src1 : src2;
+ indexOp = shSrc;
+ }
+ }
+
+ return (indexOp, scale);
+ }
+
+ private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+ {
+ // If we have multiple assignments, folding is not safe
+ // as the value may be different depending on the
+ // control flow path.
+ if (op.AssignmentsCount != 1)
+ {
+ return default;
+ }
+
+ Operation asgOp = op.Assignments[0];
+
+ if (asgOp.Instruction != inst)
+ {
+ return default;
+ }
+
+ return asgOp;
+ }
+
+ private static bool IsMemoryLoadOrStore(Instruction inst)
+ {
+ return inst == Instruction.Load || inst == Instruction.Store;
+ }
+
+ private static bool ConstTooLong(Operand constOp, OperandType accessType)
+ {
+ if ((uint)constOp.Value != constOp.Value)
+ {
+ return true;
+ }
+
+ return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
new file mode 100644
index 0000000..5db8985
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
@@ -0,0 +1,49 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmCondition
+ {
+ Eq = 0,
+ Ne = 1,
+ GeUn = 2,
+ LtUn = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ GtUn = 8,
+ LeUn = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15,
+ }
+
+ static class ComparisonArm64Extensions
+ {
+ public static ArmCondition ToArmCondition(this Comparison comp)
+ {
+ return comp switch
+ {
+#pragma warning disable IDE0055 // Disable formatting
+ Comparison.Equal => ArmCondition.Eq,
+ Comparison.NotEqual => ArmCondition.Ne,
+ Comparison.Greater => ArmCondition.Gt,
+ Comparison.LessOrEqual => ArmCondition.Le,
+ Comparison.GreaterUI => ArmCondition.GtUn,
+ Comparison.LessOrEqualUI => ArmCondition.LeUn,
+ Comparison.GreaterOrEqual => ArmCondition.Ge,
+ Comparison.Less => ArmCondition.Lt,
+ Comparison.GreaterOrEqualUI => ArmCondition.GeUn,
+ Comparison.LessUI => ArmCondition.LtUn,
+#pragma warning restore IDE0055
+
+ _ => throw new ArgumentException(null, nameof(comp)),
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
new file mode 100644
index 0000000..20ccfd4
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmExtensionType
+ {
+ Uxtb = 0,
+ Uxth = 1,
+ Uxtw = 2,
+ Uxtx = 3,
+ Sxtb = 4,
+ Sxth = 5,
+ Sxtw = 6,
+ Sxtx = 7,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
new file mode 100644
index 0000000..f32407c
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
@@ -0,0 +1,11 @@
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmShiftType
+ {
+ Lsl = 0,
+ Lsr = 1,
+ Asr = 2,
+ Ror = 3,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/Assembler.cs b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs
new file mode 100644
index 0000000..41684fa
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs
@@ -0,0 +1,1162 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+using static ARMeilleure.IntermediateRepresentation.Operand;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ class Assembler
+ {
+ public const uint SfFlag = 1u << 31;
+
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+
+ private readonly Stream _stream;
+
+ public Assembler(Stream stream)
+ {
+ _stream = stream;
+ }
+
+ public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+
+ public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false)
+ {
+ WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm);
+ }
+
+ public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Asr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Sbfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Asrv(rd, rn, rm);
+ }
+ }
+
+ public void Asrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm);
+ }
+
+ public void B(int imm)
+ {
+ WriteUInt32(0x14000000u | EncodeSImm26_2(imm));
+ }
+
+ public void B(ArmCondition condition, int imm)
+ {
+ WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5));
+ }
+
+ public void Blr(Operand rn)
+ {
+ WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Br(Operand rn)
+ {
+ WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Brk()
+ {
+ WriteUInt32(0xd4200000u);
+ }
+
+ public void Cbz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+
+ public void Cbnz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+
+ public void Clrex(int crm = 15)
+ {
+ WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8));
+ }
+
+ public void Clz(Operand rd, Operand rn)
+ {
+ WriteInstructionAuto(0x5ac01000u, rd, rn);
+ }
+
+ public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true)
+ {
+ Debug.Assert((uint)size < 4);
+ WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q);
+ }
+
+ public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm);
+ }
+
+ public void Cset(Operand rd, ArmCondition condition)
+ {
+ var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type);
+ Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1));
+ }
+
+ public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm);
+ }
+
+ public void Dmb(uint option)
+ {
+ WriteUInt32(0xd50330bfu | (option << 8));
+ }
+
+ public void DupScalar(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+
+ public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q);
+ }
+
+ public void Extr(Operand rd, Operand rn, Operand rm, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm);
+ }
+
+ public void FaddScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e202800u, rd, rn, rm);
+ }
+
+ public void FcvtScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22);
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void FdivScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e201800u, rd, rn, rm);
+ }
+
+ public void Fmov(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e204000u, rd, rn);
+ }
+
+ public void Fmov(Operand rd, Operand rn, bool topHalf)
+ {
+ Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger());
+ Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf);
+
+ uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u;
+
+ uint rmode = topHalf ? 1u << 19 : 0u;
+ uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u;
+ uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u;
+
+ WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void FmulScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e200800u, rd, rn, rm);
+ }
+
+ public void FnegScalar(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e214000u, rd, rn);
+ }
+
+ public void FsubScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e203800u, rd, rn, rm);
+ }
+
+ public void Ins(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+
+ public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size)
+ {
+ uint imm4 = (uint)srcIndex << size;
+ Debug.Assert((uint)srcIndex < (16u >> size));
+ WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn);
+ }
+
+ public void Ldaxp(Operand rt, Operand rt2, Operand rn)
+ {
+ WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2);
+ }
+
+ public void Ldaxr(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn);
+ }
+
+ public void Ldaxrb(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u, rt, rn);
+ }
+
+ public void Ldaxrh(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | (1u << 30), rt, rn);
+ }
+
+ public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void Ldr(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ LdrRiUn(rt, rn, 0);
+ }
+ }
+
+ public void LdrLit(Operand rt, int offset)
+ {
+ uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5);
+
+ if (rt.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstruction(instruction, rt);
+ }
+
+ public void LdrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+
+ public void LdrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Ldur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Lsl(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, -shift & mask, mask - shift);
+ }
+ else
+ {
+ Lslv(rd, rn, rm);
+ }
+ }
+
+ public void Lslv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm);
+ }
+
+ public void Lsr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Lsrv(rd, rn, rm);
+ }
+ }
+
+ public void Lsrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm);
+ }
+
+ public void Madd(Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra);
+ }
+
+ public void Mul(Operand rd, Operand rn, Operand rm)
+ {
+ Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type));
+ }
+
+ public void Mov(Operand rd, Operand rn)
+ {
+ if (rd.Type.IsInteger())
+ {
+ Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn);
+ }
+ else
+ {
+ OrrVector(rd, rn, rn);
+ }
+ }
+
+ public void MovSp(Operand rd, Operand rn)
+ {
+ if (rd.GetRegister().Index == SpRegister ||
+ rn.GetRegister().Index == SpRegister)
+ {
+ Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true);
+ }
+ else
+ {
+ Mov(rd, rn);
+ }
+ }
+
+ public void Mov(Operand rd, int imm)
+ {
+ Movz(rd, imm, 0);
+ }
+
+ public void Movz(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+
+ public void Movk(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+
+ public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2)
+ {
+ uint instruction = 0xd5300000u;
+
+ instruction |= (op2 & 7) << 5;
+ instruction |= (crm & 15) << 8;
+ instruction |= (crn & 15) << 12;
+ instruction |= (op1 & 7) << 16;
+ instruction |= (o0 & 1) << 19;
+
+ WriteInstruction(instruction, rt);
+ }
+
+ public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+
+ public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+
+ public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q);
+ }
+
+ public void Ret(Operand rn)
+ {
+ WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Rev(Operand rd, Operand rn)
+ {
+ uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u;
+ WriteInstructionAuto(0x5ac00800u | opc0, rd, rn);
+ }
+
+ public void Ror(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Extr(rd, rn, rn, shift);
+ }
+ else
+ {
+ Rorv(rd, rn, rm);
+ }
+ }
+
+ public void Rorv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm);
+ }
+
+ public void Sbfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+
+ public void ScvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e220000u;
+
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+
+ public void Sdiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm);
+ }
+
+ public void Smulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9b407c00u, rd, rn, rm);
+ }
+
+ public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs)
+ {
+ WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2);
+ }
+
+ public void Stlxr(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs);
+ }
+
+ public void Stlxrb(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u, rt, rn, rs);
+ }
+
+ public void Stlxrh(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs);
+ }
+
+ public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void Str(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ StrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ StrRiUn(rt, rn, 0);
+ }
+ }
+
+ public void StrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+
+ public void StrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Stur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Sxtb(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 7);
+ }
+
+ public void Sxth(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 15);
+ }
+
+ public void Sxtw(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 31);
+ }
+
+ public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Ubfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+
+ public void UcvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e230000u;
+
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+
+ public void Udiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm);
+ }
+
+ public void Umov(Operand rd, Operand rn, int index, int size)
+ {
+ uint q = size == 3 ? 1u << 30 : 0u;
+ WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn);
+ }
+
+ public void Umulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9bc07c00u, rd, rn, rm);
+ }
+
+ public void Uxtb(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 7);
+ }
+
+ public void Uxth(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 15);
+ }
+
+ private void WriteInstructionAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0,
+ bool immForm = false)
+ {
+ if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm))
+ {
+ Debug.Assert(shiftAmount == 0);
+ int imm = rm.AsInt32();
+ Debug.Assert((uint)imm == rm.Value);
+ if (imm != 0 && (imm & 0xfff) == 0)
+ {
+ instI |= 1 << 22; // sh flag
+ imm >>= 12;
+ }
+ WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn);
+ }
+ else
+ {
+ instR |= EncodeUImm6(shiftAmount) << 10;
+ instR |= (uint)shiftType << 22;
+
+ WriteInstructionRm16Auto(instR, rd, rn, rm);
+ }
+ }
+
+ private void WriteInstructionAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ int shiftAmount = 0)
+ {
+ Debug.Assert((uint)shiftAmount <= 4);
+
+ instruction |= (uint)shiftAmount << 10;
+ instruction |= (uint)extensionType << 13;
+
+ WriteInstructionRm16Auto(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionBitwiseAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rm.Kind == OperandKind.Constant && rm.Value != 0)
+ {
+ Debug.Assert(shiftAmount == 0);
+ bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR);
+ Debug.Assert(canEncode);
+ uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22);
+
+ WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount);
+ }
+ }
+
+ private void WriteInstructionBitwiseAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ instruction |= EncodeUImm6(shiftAmount) << 10;
+ instruction |= (uint)shiftType << 22;
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionLdrStrAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ bool shift)
+ {
+ if (shift)
+ {
+ instruction |= 1u << 12;
+ }
+
+ instruction |= (uint)extensionType << 13;
+
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionAuto(uint instruction, Operand rd)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd);
+ }
+
+ public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd, rn);
+ }
+
+ private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd)
+ {
+ WriteUInt32(instruction | EncodeReg(rd));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16));
+ }
+
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ if (q)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+
+ public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+
+ private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type)
+ {
+ uint instruction;
+ int scale;
+
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+
+ if (type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ scale = 3;
+ }
+ else
+ {
+ scale = 2;
+ }
+ }
+ else
+ {
+ int opc = type switch
+ {
+ OperandType.FP32 => 0,
+ OperandType.FP64 => 1,
+ _ => 2,
+ };
+
+ instruction = vecInst | ((uint)opc << 30);
+ scale = 2 + opc;
+ }
+
+ instruction |= (EncodeSImm7(imm, scale) << 15);
+
+ return instruction;
+ }
+
+ private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type)
+ {
+ uint instruction;
+
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+
+ if (type == OperandType.I64)
+ {
+ instruction |= 1 << 30;
+ }
+ }
+ else
+ {
+ instruction = vecInst;
+
+ if (type == OperandType.V128)
+ {
+ instruction |= 1u << 23;
+ }
+ else
+ {
+ instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30;
+ }
+ }
+
+ return instruction;
+ }
+
+ private static uint EncodeIndexSizeImm5(int index, int size)
+ {
+ Debug.Assert((uint)size < 4);
+ Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination.");
+ return ((uint)index << (size + 1)) | (1u << size);
+ }
+
+ private static uint EncodeSImm7(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0x7f;
+ Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm9(int value)
+ {
+ uint imm = (uint)value & 0x1ff;
+ Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm19_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x7ffff;
+ Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm26_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x3ffffff;
+ Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm4(int value)
+ {
+ uint imm = (uint)value & 0xf;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm6(int value)
+ {
+ uint imm = (uint)value & 0x3f;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm12(int value, OperandType type)
+ {
+ return EncodeUImm12(value, GetScaleForType(type));
+ }
+
+ private static uint EncodeUImm12(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0xfff;
+ Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm16(int value)
+ {
+ uint imm = (uint)value & 0xffff;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeReg(Operand reg)
+ {
+ if (reg.Kind == OperandKind.Constant && reg.Value == 0)
+ {
+ return ZrRegister;
+ }
+
+ uint regIndex = (uint)reg.GetRegister().Index;
+ Debug.Assert(reg.Kind == OperandKind.Register);
+ Debug.Assert(regIndex < 32);
+ return regIndex;
+ }
+
+ public static int GetScaleForType(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.I32 => 2,
+ OperandType.I64 => 3,
+ OperandType.FP32 => 2,
+ OperandType.FP64 => 3,
+ OperandType.V128 => 4,
+ _ => throw new ArgumentException($"Invalid type {type}."),
+ };
+ }
+
+#pragma warning disable IDE0051 // Remove unused private member
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+#pragma warning restore IDE0051
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
new file mode 100644
index 0000000..a487c2e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
@@ -0,0 +1,96 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = unchecked((int)0xffffffff);
+
+ // Some of those register have specific roles and can't be used as general purpose registers.
+ // X18 - Reserved for platform specific usage.
+ // X29 - Frame pointer.
+ // X30 - Return address.
+ // X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
+ private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31);
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~ReservedRegsMask;
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask;
+ }
+
+ public static int GetFpCallerSavedRegisters()
+ {
+ return GetFpCalleeSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ return GetVecCalleeSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return 0x1ff80000; // X19 to X28
+ }
+
+ public static int GetFpCalleeSavedRegisters()
+ {
+ return 0xff00; // D8 to D15
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return 0;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static int GetIntArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static int GetVecArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static int GetIntReturnRegister()
+ {
+ return 0;
+ }
+
+ public static int GetIntReturnRegisterHigh()
+ {
+ return 1;
+ }
+
+ public static int GetVecReturnRegister()
+ {
+ return 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
new file mode 100644
index 0000000..1f0148d
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
@@ -0,0 +1,91 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGenCommon
+ {
+ public const int TcAddressRegister = 8;
+ public const int ReservedRegister = 17;
+
+ public static bool ConstFitsOnSImm7(int value, int scale)
+ {
+ return (((value >> scale) << 25) >> (25 - scale)) == value;
+ }
+
+ public static bool ConstFitsOnSImm9(int value)
+ {
+ return ((value << 23) >> 23) == value;
+ }
+
+ public static bool ConstFitsOnUImm12(int value)
+ {
+ return (value & 0xfff) == value;
+ }
+
+ public static bool ConstFitsOnUImm12(int value, OperandType type)
+ {
+ int scale = Assembler.GetScaleForType(type);
+ return (((value >> scale) & 0xfff) << scale) == value;
+ }
+
+ public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
+ {
+ return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR);
+ }
+
+ public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR)
+ {
+ if (type == OperandType.I32)
+ {
+ value |= value << 32;
+ }
+
+ return TryEncodeBitMask(value, out immN, out immS, out immR);
+ }
+
+ public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
+ {
+ // Some special values also can't be encoded:
+ // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
+ // A value with all bits set can't be encoded because it is reserved according to the spec, because:
+ // Any value AND all ones will be equal itself, so it's effectively a no-op.
+ // Any value OR all ones will be equal all ones, so one can just use MOV.
+ // Any value XOR all ones will be equal its inverse, so one can just use MVN.
+ if (value == 0 || value == ulong.MaxValue)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+
+ return false;
+ }
+
+ // Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not
+ // been cut-in-half across the word boundary.
+ int rotation = BitOperations.TrailingZeroCount(value & (value + 1));
+ ulong rotatedValue = ulong.RotateRight(value, rotation);
+
+ // Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones
+ // in element.
+ int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1));
+ int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue);
+
+ // Check the value is repeating; also ensures element size is a power of two.
+ if (ulong.RotateRight(value, elementSize) != value)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+
+ return false;
+ }
+
+ immN = (elementSize >> 6) & 1;
+ immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f;
+ immR = (elementSize - rotation) & (elementSize - 1);
+
+ return true;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
new file mode 100644
index 0000000..89b1e9e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
@@ -0,0 +1,287 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ class CodeGenContext
+ {
+ private const int BccInstLength = 4;
+ private const int CbnzInstLength = 4;
+ private const int LdrLitInstLength = 4;
+
+ private readonly Stream _stream;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+
+ public BasicBlock CurrBlock { get; private set; }
+
+ public bool HasCall { get; }
+
+ public int CallArgsRegionSize { get; }
+ public int FpLrSaveRegionSize { get; }
+
+ private readonly Dictionary _visitedBlocks;
+ private readonly Dictionary> _pendingBranches;
+
+ private readonly struct ConstantPoolEntry
+ {
+ public readonly int Offset;
+ public readonly Symbol Symbol;
+ public readonly List<(Operand, int)> LdrOffsets;
+
+ public ConstantPoolEntry(int offset, Symbol symbol)
+ {
+ Offset = offset;
+ Symbol = symbol;
+ LdrOffsets = new List<(Operand, int)>();
+ }
+ }
+
+ private readonly Dictionary _constantPool;
+
+ private bool _constantPoolWritten;
+ private long _constantPoolOffset;
+
+ private ArmCondition _jNearCondition;
+ private Operand _jNearValue;
+
+ private long _jNearPosition;
+
+ private readonly bool _relocatable;
+
+ public CodeGenContext(AllocationResult allocResult, int maxCallArgs, bool relocatable)
+ {
+ _stream = MemoryStreamManager.Shared.GetStream();
+
+ AllocResult = allocResult;
+
+ Assembler = new Assembler(_stream);
+
+ bool hasCall = maxCallArgs >= 0;
+
+ HasCall = hasCall;
+
+ if (maxCallArgs < 0)
+ {
+ maxCallArgs = 0;
+ }
+
+ CallArgsRegionSize = maxCallArgs * 16;
+ FpLrSaveRegionSize = hasCall ? 16 : 0;
+
+ _visitedBlocks = new Dictionary();
+ _pendingBranches = new Dictionary>();
+ _constantPool = new Dictionary();
+
+ _relocatable = relocatable;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ CurrBlock = block;
+
+ long target = _stream.Position;
+
+ if (_pendingBranches.TryGetValue(block, out var list))
+ {
+ foreach ((ArmCondition condition, long branchPos) in list)
+ {
+ _stream.Seek(branchPos, SeekOrigin.Begin);
+ WriteBranch(condition, target);
+ }
+
+ _stream.Seek(target, SeekOrigin.Begin);
+ _pendingBranches.Remove(block);
+ }
+
+ _visitedBlocks.Add(block, target);
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ JumpTo(ArmCondition.Al, target);
+ }
+
+ public void JumpTo(ArmCondition condition, BasicBlock target)
+ {
+ if (_visitedBlocks.TryGetValue(target, out long offset))
+ {
+ WriteBranch(condition, offset);
+ }
+ else
+ {
+ if (!_pendingBranches.TryGetValue(target, out var list))
+ {
+ list = new List<(ArmCondition, long)>();
+ _pendingBranches.Add(target, list);
+ }
+
+ list.Add((condition, _stream.Position));
+
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+ }
+
+ private void WriteBranch(ArmCondition condition, long to)
+ {
+ int imm = checked((int)(to - _stream.Position));
+
+ if (condition != ArmCondition.Al)
+ {
+ Assembler.B(condition, imm);
+ }
+ else
+ {
+ Assembler.B(imm);
+ }
+ }
+
+ public void JumpToNear(ArmCondition condition)
+ {
+ _jNearCondition = condition;
+ _jNearPosition = _stream.Position;
+
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+
+ public void JumpToNearIfNotZero(Operand value)
+ {
+ _jNearValue = value;
+ _jNearPosition = _stream.Position;
+
+ _stream.Seek(CbnzInstLength, SeekOrigin.Current);
+ }
+
+ public void JumpHere()
+ {
+ long currentPosition = _stream.Position;
+ long offset = currentPosition - _jNearPosition;
+
+ _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+
+ if (_jNearValue != default)
+ {
+ Assembler.Cbnz(_jNearValue, checked((int)offset));
+ _jNearValue = default;
+ }
+ else
+ {
+ Assembler.B(_jNearCondition, checked((int)offset));
+ }
+
+ _stream.Seek(currentPosition, SeekOrigin.Begin);
+ }
+
+ public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value)
+ {
+ if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe))
+ {
+ cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol);
+ _constantPool.Add(value, cpe);
+ }
+
+ cpe.LdrOffsets.Add((rt, (int)_stream.Position));
+ _stream.Seek(LdrLitInstLength, SeekOrigin.Current);
+ }
+
+ private long WriteConstantPool()
+ {
+ if (_constantPoolWritten)
+ {
+ return _constantPoolOffset;
+ }
+
+ long constantPoolBaseOffset = _stream.Position;
+
+ foreach (ulong value in _constantPool.Keys)
+ {
+ WriteUInt64(value);
+ }
+
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets)
+ {
+ _stream.Seek(ldrOffset, SeekOrigin.Begin);
+
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ int pcRelativeOffset = absoluteOffset - ldrOffset;
+
+ Assembler.LdrLit(rt, pcRelativeOffset);
+ }
+ }
+
+ _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin);
+
+ _constantPoolOffset = constantPoolBaseOffset;
+ _constantPoolWritten = true;
+
+ return constantPoolBaseOffset;
+ }
+
+ public (byte[], RelocInfo) GetCode()
+ {
+ long constantPoolBaseOffset = WriteConstantPool();
+
+ byte[] code = new byte[_stream.Length];
+
+ long originalPosition = _stream.Position;
+
+ _stream.Seek(0, SeekOrigin.Begin);
+ _stream.ReadExactly(code, 0, code.Length);
+ _stream.Seek(originalPosition, SeekOrigin.Begin);
+
+ RelocInfo relocInfo;
+
+ if (_relocatable)
+ {
+ RelocEntry[] relocs = new RelocEntry[_constantPool.Count];
+
+ int index = 0;
+
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ if (cpe.Symbol.Type != SymbolType.None)
+ {
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol);
+ }
+ }
+
+ if (index != relocs.Length)
+ {
+ Array.Resize(ref relocs, index);
+ }
+
+ relocInfo = new RelocInfo(relocs);
+ }
+ else
+ {
+ relocInfo = new RelocInfo(Array.Empty());
+ }
+
+ return (code, relocInfo);
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
new file mode 100644
index 0000000..2df8667
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
@@ -0,0 +1,1581 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+using static ARMeilleure.IntermediateRepresentation.Operand;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGenerator
+ {
+ private const int DWordScale = 3;
+
+ private const int RegistersCount = 32;
+
+ private const int FpRegister = 29;
+ private const int LrRegister = 30;
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+
+ private enum AccessSize
+ {
+ Byte,
+ Hword,
+ Auto,
+ }
+
+ private static readonly Action[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action[EnumUtils.GetCount(typeof(Instruction))];
+
+#pragma warning disable IDE0055 // Disable formatting
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.BranchIf, GenerateBranchIf);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ // Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.Compare, GenerateCompare);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
+ Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16);
+ Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+#pragma warning restore IDE0055
+
+ static void Add(Instruction inst, Action func)
+ {
+ _instTable[(int)inst] = func;
+ }
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+ {
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ BlockPlacement.RunPass(cfg);
+ }
+
+ Arm64Optimizer.RunPass(cfg);
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new();
+
+ PreAllocator.RunPass(cctx, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+
+ CodeGenContext context = new(allocResult, maxCallArgs, relocatable);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ context.EnterBlock(block);
+
+ for (Operation node = block.Operations.First; node != default;)
+ {
+ node = GenerateOperation(context, node);
+ }
+
+ if (block.SuccessorsCount == 0)
+ {
+ // The only blocks which can have 0 successors are exit blocks.
+ Operation last = block.Operations.Last;
+
+ Debug.Assert(last.Instruction == Instruction.Tailcall ||
+ last.Instruction == Instruction.Return);
+ }
+ else
+ {
+ BasicBlock succ = block.GetSuccessor(0);
+
+ if (succ != block.ListNext)
+ {
+ context.JumpTo(succ);
+ }
+ }
+ }
+
+ (byte[] code, RelocInfo relocInfo) = context.GetCode();
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(code, unwindInfo, relocInfo);
+ }
+
+ private static Operation GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ CodeGeneratorIntrinsic.GenerateOperation(context, operation);
+ }
+ else
+ {
+ if (IsLoadOrStore(operation) &&
+ operation.ListNext != default &&
+ operation.ListNext.Instruction == operation.Instruction &&
+ TryPairMemoryOp(context, operation, operation.ListNext))
+ {
+ // Skip next operation if we managed to pair them.
+ return operation.ListNext.ListNext;
+ }
+
+ Action func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+
+ return operation.ListNext;
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ // ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Add(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FaddScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.And(dest, src1, src2);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Eor(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.EorVector(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Mvn(dest, source);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Orr(dest, src1, src2);
+ }
+
+ private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Rev(dest, source);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Blr(operation.GetSource(0));
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.Assembler.Cset(dest, cond);
+ }
+
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+ {
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ Operand actualLow = operation.GetDestination(0);
+ Operand actualHigh = operation.GetDestination(1);
+ Operand temp0 = operation.GetDestination(2);
+ Operand temp1 = operation.GetDestination(3);
+ Operand address = operation.GetSource(0);
+ Operand expectedLow = operation.GetSource(1);
+ Operand expectedHigh = operation.GetSource(2);
+ Operand desiredLow = operation.GetSource(3);
+ Operand desiredHigh = operation.GetSource(4);
+
+ GenerateAtomicDcas(
+ context,
+ address,
+ expectedLow,
+ expectedHigh,
+ desiredLow,
+ desiredHigh,
+ actualLow,
+ actualHigh,
+ temp0,
+ temp1);
+ }
+ else
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto);
+ }
+ }
+
+ private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword);
+ }
+
+ private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte);
+ }
+
+ private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(src1.Type.IsInteger());
+
+ context.Assembler.Cmp(src1, src2);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Cmp(src1, Const(src1.Type, 0));
+ context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, Register(source, OperandType.I32));
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type != OperandType.V128);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.ScvtfScalar(dest, source);
+ }
+ else
+ {
+ context.Assembler.FcvtScalar(dest, source);
+ }
+ }
+
+ private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type.IsInteger());
+
+ context.Assembler.UcvtfScalar(dest, source);
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant)
+ {
+ if (source.Relocatable)
+ {
+ context.ReserveRelocatableConstant(dest, source.Symbol, source.Value);
+ }
+ else
+ {
+ GenerateConstantCopy(context, dest, source.Value);
+ }
+ }
+ else
+ {
+ context.Assembler.Mov(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Clz(dest, source);
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ ValidateBinOp(dest, dividend, divisor);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sdiv(dest, dividend, divisor);
+ }
+ else
+ {
+ context.Assembler.FdivScalar(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ ValidateBinOp(dest, dividend, divisor);
+
+ context.Assembler.Udiv(dest, dividend, divisor);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ context.Assembler.Ldr(value, address);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.LdrhRiUn(value, address, 0);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.LdrbRiUn(value, address, 0);
+ }
+
+ private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Dmb(0xf);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mul(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FmulScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ context.Assembler.Smulh(dest, src1, src2);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ context.Assembler.Umulh(dest, src1, src2);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Neg(dest, source);
+ }
+ else
+ {
+ context.Assembler.FnegScalar(dest, source);
+ }
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.LdrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Ldur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.LdrRiUn(value, tempAddress, 0);
+ }
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Ret(Register(LrRegister));
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src1, src2);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Lsl(dest, src1, src2);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Asr(dest, src1, src2);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Lsr(dest, src1, src2);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxth(dest, source);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxtw(dest, source);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxtb(dest, source);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+ GenerateLoad(context, dest, Register(SpRegister), offs);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.StrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Stur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.StrRiUn(value, tempAddress, 0);
+ }
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+ context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs));
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ context.Assembler.Str(value, address);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.StrhRiUn(value, address, 0);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.StrbRiUn(value, address, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ GenerateStore(context, source, Register(SpRegister), offs);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ // ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FsubScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Br(operation.GetSource(0));
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ if (dest != default)
+ {
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32;
+
+ context.Assembler.Fmov(Register(dest, destType), source, topHalf: false);
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Umov(dest, src1, index, 1);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ context.Assembler.Umov(dest, src1, index, 0);
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ if (src2.Type.IsInteger())
+ {
+ context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Ins(dest, src2, index, 1);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Ins(dest, src2, index, 0);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.CmeqVector(dest, dest, dest, 2);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.EorVector(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64));
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32));
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Uxth(dest, source);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ // We can eliminate the move if source is already 32-bit and the registers are the same.
+ if (dest.Value == source.Value && source.Type == OperandType.I32)
+ {
+ return;
+ }
+
+ context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Uxtb(dest, source);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List pushEntries = new();
+
+ Operand rsp = Register(SpRegister);
+
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+ int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8);
+
+ int offset = 0;
+
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize);
+ context.Assembler.MovSp(Register(FpRegister), rsp);
+ }
+
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+ }
+ else
+ {
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+
+ context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize));
+ }
+ else
+ {
+ context.Assembler.MovSp(Register(FpRegister), Register(SpRegister));
+ }
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+ }
+
+ private static void WritePrologueCalleeSavesPreIndexed(
+ CodeGenContext context,
+ List pushEntries,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ if ((BitOperations.PopCount((uint)mask) & 1) != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+ mask &= ~(1 << reg);
+
+ if (offset != 0)
+ {
+ context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+
+ offset += type.GetSizeInBytes();
+ }
+
+ while (mask != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+ mask &= ~(1 << reg);
+
+ int reg2 = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2));
+
+ mask &= ~(1 << reg2);
+
+ if (offset != 0)
+ {
+ context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+
+ offset += type.GetSizeInBytes() * 2;
+ }
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(SpRegister);
+
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize);
+ }
+ }
+ else
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+ }
+
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+ }
+
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+ int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8;
+ int calleeSaveRegionSize = Align16(offset);
+
+ WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+ WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ }
+
+ private static void WriteEpilogueCalleeSavesPostIndexed(
+ CodeGenContext context,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ while (mask != 0)
+ {
+ int reg = BitUtils.HighestBitSet(mask);
+
+ mask &= ~(1 << reg);
+
+ if (mask != 0)
+ {
+ int reg2 = BitUtils.HighestBitSet(mask);
+
+ mask &= ~(1 << reg2);
+
+ offset -= type.GetSizeInBytes() * 2;
+
+ if (offset != 0)
+ {
+ context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ else
+ {
+ offset -= type.GetSizeInBytes();
+
+ if (offset != 0)
+ {
+ context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ }
+ }
+
+ private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value)
+ {
+ if (value == 0)
+ {
+ context.Assembler.Mov(dest, Register(ZrRegister, dest.Type));
+ }
+ else if (CodeGenCommon.TryEncodeBitMask(dest.Type, value, out _, out _, out _))
+ {
+ context.Assembler.Orr(dest, Register(ZrRegister, dest.Type), Const(dest.Type, (long)value));
+ }
+ else
+ {
+ int hw = 0;
+ bool first = true;
+
+ while (value != 0)
+ {
+ int valueLow = (ushort)value;
+ if (valueLow != 0)
+ {
+ if (first)
+ {
+ context.Assembler.Movz(dest, valueLow, hw);
+ first = false;
+ }
+ else
+ {
+ context.Assembler.Movk(dest, valueLow, hw);
+ }
+ }
+
+ hw++;
+ value >>= 16;
+ }
+ }
+ }
+
+ private static void GenerateAtomicCas(
+ CodeGenContext context,
+ Operand address,
+ Operand expected,
+ Operand desired,
+ Operand actual,
+ Operand result,
+ AccessSize accessSize)
+ {
+ int startOffset = context.StreamOffset;
+
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Ldaxrb(actual, address);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Ldaxrh(actual, address);
+ break;
+ default:
+ context.Assembler.Ldaxr(actual, address);
+ break;
+ }
+
+ context.Assembler.Cmp(actual, expected);
+
+ context.JumpToNear(ArmCondition.Ne);
+
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Stlxrb(desired, address, result);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Stlxrh(desired, address, result);
+ break;
+ default:
+ context.Assembler.Stlxr(desired, address, result);
+ break;
+ }
+
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+ context.JumpHere();
+
+ context.Assembler.Clrex();
+ }
+
+ private static void GenerateAtomicDcas(
+ CodeGenContext context,
+ Operand address,
+ Operand expectedLow,
+ Operand expectedHigh,
+ Operand desiredLow,
+ Operand desiredHigh,
+ Operand actualLow,
+ Operand actualHigh,
+ Operand temp0,
+ Operand temp1)
+ {
+ int startOffset = context.StreamOffset;
+
+ context.Assembler.Ldaxp(actualLow, actualHigh, address);
+ context.Assembler.Eor(temp0, actualHigh, expectedHigh);
+ context.Assembler.Eor(temp1, actualLow, expectedLow);
+ context.Assembler.Orr(temp0, temp1, temp0);
+
+ context.JumpToNearIfNotZero(temp0);
+
+ Operand result = Register(temp0, OperandType.I32);
+
+ context.Assembler.Stlxp(desiredLow, desiredHigh, address, result);
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+ context.JumpHere();
+
+ context.Assembler.Clrex();
+ }
+
+ private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp)
+ {
+ if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset))
+ {
+ return false;
+ }
+
+ if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset))
+ {
+ return false;
+ }
+
+ if (op1Base != op2Base)
+ {
+ return false;
+ }
+
+ OperandType valueType = GetMemOpValueType(currentOp);
+
+ if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset)
+ {
+ return false;
+ }
+
+ if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2()))
+ {
+ return false;
+ }
+
+ if (currentOp.Instruction == Instruction.Load)
+ {
+ context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset);
+ }
+ else if (currentOp.Instruction == Instruction.Store)
+ {
+ context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset);
+ }
+ else
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ private static bool IsLoadOrStore(Operation operation)
+ {
+ return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store;
+ }
+
+ private static OperandType GetMemOpValueType(Operation operation)
+ {
+ if (operation.Destination != default)
+ {
+ return operation.Destination.Type;
+ }
+
+ return operation.GetSource(1).Type;
+ }
+
+ private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset)
+ {
+ baseAddress = default;
+ offset = 0;
+ Operand address = operation.GetSource(0);
+
+ if (address.Kind != OperandKind.Memory)
+ {
+ return false;
+ }
+
+ MemoryOperand memOp = address.GetMemory();
+ Operand baseOp = memOp.BaseAddress;
+
+ if (baseOp == default)
+ {
+ baseOp = memOp.Index;
+
+ if (baseOp == default || memOp.Scale != Multiplier.x1)
+ {
+ return false;
+ }
+ }
+ if (memOp.Index != default)
+ {
+ return false;
+ }
+
+ baseAddress = memOp.BaseAddress;
+ offset = memOp.Displacement;
+
+ return true;
+ }
+
+ private static Operand Register(Operand operand, OperandType type = OperandType.I64)
+ {
+ return Register(operand.GetRegister().Index, type);
+ }
+
+ private static Operand Register(int register, OperandType type = OperandType.I64)
+ {
+ return Factory.Register(register, RegisterType.Integer, type);
+ }
+
+ private static int Align16(int value)
+ {
+ return (value + 0xf) & ~0xf;
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+#pragma warning disable IDE0051 // Remove unused private member
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+#pragma warning restore IDE0051
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
new file mode 100644
index 0000000..b873705
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
@@ -0,0 +1,691 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGeneratorIntrinsic
+ {
+ public static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ Intrinsic intrin = operation.Intrinsic;
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ switch (info.Type)
+ {
+ case IntrinsicType.ScalarUnary:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorUnaryByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarBinary:
+ GenerateVectorBinary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryRd:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPCompare:
+ GenerateScalarFPCompare(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvFixedGpr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvGpr:
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernary:
+ GenerateScalarTernary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2),
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.ScalarTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.ScalarTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+
+ case IntrinsicType.Vector128Unary:
+ GenerateVectorUnary(
+ context,
+ 1,
+ 0,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.Vector128Binary:
+ GenerateVectorBinary(
+ context,
+ 1,
+ 0,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.Vector128BinaryRd:
+ GenerateVectorUnary(
+ context,
+ 1,
+ 0,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+
+ case IntrinsicType.VectorUnary:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorUnaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorBinary:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryRd:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorInsertByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorInsertByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorLookupTable:
+ Debug.Assert((uint)(operation.SourcesCount - 2) <= 3);
+
+ for (int i = 1; i < operation.SourcesCount - 1; i++)
+ {
+ Register currReg = operation.GetSource(i).GetRegister();
+ Register prevReg = operation.GetSource(i - 1).GetRegister();
+
+ Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector);
+ }
+
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst | ((uint)(operation.SourcesCount - 2) << 13),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(operation.SourcesCount - 1));
+ break;
+ case IntrinsicType.VectorTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRd:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.VectorTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+
+ case IntrinsicType.GetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.Destination);
+ break;
+ case IntrinsicType.SetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0));
+ break;
+
+ default:
+ throw new NotImplementedException(info.Type.ToString());
+ }
+ }
+
+ private static void GenerateScalarFPCompare(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand dest,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (sz << 22);
+
+ if (rm.Kind == OperandKind.Constant && rm.Value == 0)
+ {
+ instruction |= 0b1000;
+ rm = rn;
+ }
+
+ context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm);
+ context.Assembler.Mrs(dest, 1, 3, 4, 2, 0);
+ }
+
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (sz << 22);
+
+ if (rd.Type.IsInteger())
+ {
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint fBits)
+ {
+ Debug.Assert(fBits <= 64);
+
+ instruction |= (sz << 22);
+ instruction |= (64 - fBits) << 10;
+
+ if (rd.Type.IsInteger())
+ {
+ Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32);
+
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+ else
+ {
+ Debug.Assert(fBits <= 32);
+ }
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ }
+
+ private static void GenerateScalarTernary(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ Operand ra)
+ {
+ instruction |= (sz << 22);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+
+ private static void GenerateVectorUnary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorUnaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+ instruction |= (q << 30) | (imm5 << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30);
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint size,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (size << 22);
+
+ if (size == 2)
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+ else
+ {
+ instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9);
+ }
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryFPByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ if (sz != 0)
+ {
+ instruction |= (srcIndex & 1) << 11;
+ }
+ else
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryShlImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+
+ Debug.Assert(shift >= 0 && shift < (8u << (int)sz));
+
+ uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz)));
+
+ instruction |= (imm << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorBinaryShrImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+
+ Debug.Assert(shift > 0 && shift <= (8u << (int)sz));
+
+ uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift);
+
+ instruction |= (imm << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorInsertByElem(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ uint dstIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm4 = srcIndex << (int)sz;
+ uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+ instruction |= imm4 << 11;
+ instruction |= imm5 << 16;
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
new file mode 100644
index 0000000..86afc2b
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
@@ -0,0 +1,182 @@
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Versioning;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static partial class HardwareCapabilities
+ {
+ static HardwareCapabilities()
+ {
+ if (!ArmBase.Arm64.IsSupported)
+ {
+ return;
+ }
+
+ if (OperatingSystem.IsLinux())
+ {
+ LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP);
+ LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2);
+ }
+
+ if (OperatingSystem.IsMacOS())
+ {
+ for (int i = 0; i < _sysctlNames.Length; i++)
+ {
+ if (CheckSysctlName(_sysctlNames[i]))
+ {
+ MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i);
+ }
+ }
+ }
+ }
+
+ #region Linux
+
+ private const ulong AT_HWCAP = 16;
+ private const ulong AT_HWCAP2 = 26;
+
+ [LibraryImport("libc", SetLastError = true)]
+ private static partial ulong getauxval(ulong type);
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap : ulong
+ {
+ Fp = 1 << 0,
+ Asimd = 1 << 1,
+ Evtstrm = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Sha1 = 1 << 5,
+ Sha2 = 1 << 6,
+ Crc32 = 1 << 7,
+ Atomics = 1 << 8,
+ FpHp = 1 << 9,
+ AsimdHp = 1 << 10,
+ CpuId = 1 << 11,
+ AsimdRdm = 1 << 12,
+ Jscvt = 1 << 13,
+ Fcma = 1 << 14,
+ Lrcpc = 1 << 15,
+ DcpOp = 1 << 16,
+ Sha3 = 1 << 17,
+ Sm3 = 1 << 18,
+ Sm4 = 1 << 19,
+ AsimdDp = 1 << 20,
+ Sha512 = 1 << 21,
+ Sve = 1 << 22,
+ AsimdFhm = 1 << 23,
+ Dit = 1 << 24,
+ Uscat = 1 << 25,
+ Ilrcpc = 1 << 26,
+ FlagM = 1 << 27,
+ Ssbs = 1 << 28,
+ Sb = 1 << 29,
+ Paca = 1 << 30,
+ Pacg = 1UL << 31,
+ }
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap2 : ulong
+ {
+ Dcpodp = 1 << 0,
+ Sve2 = 1 << 1,
+ SveAes = 1 << 2,
+ SvePmull = 1 << 3,
+ SveBitperm = 1 << 4,
+ SveSha3 = 1 << 5,
+ SveSm4 = 1 << 6,
+ FlagM2 = 1 << 7,
+ Frint = 1 << 8,
+ SveI8mm = 1 << 9,
+ SveF32mm = 1 << 10,
+ SveF64mm = 1 << 11,
+ SveBf16 = 1 << 12,
+ I8mm = 1 << 13,
+ Bf16 = 1 << 14,
+ Dgh = 1 << 15,
+ Rng = 1 << 16,
+ Bti = 1 << 17,
+ Mte = 1 << 18,
+ Ecv = 1 << 19,
+ Afp = 1 << 20,
+ Rpres = 1 << 21,
+ Mte3 = 1 << 22,
+ Sme = 1 << 23,
+ Sme_i16i64 = 1 << 24,
+ Sme_f64f64 = 1 << 25,
+ Sme_i8i32 = 1 << 26,
+ Sme_f16f32 = 1 << 27,
+ Sme_b16f32 = 1 << 28,
+ Sme_f32f32 = 1 << 29,
+ Sme_fa64 = 1 << 30,
+ Wfxt = 1UL << 31,
+ Ebf16 = 1UL << 32,
+ Sve_Ebf16 = 1UL << 33,
+ Cssc = 1UL << 34,
+ Rprfm = 1UL << 35,
+ Sve2p1 = 1UL << 36,
+ }
+
+ public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0;
+ public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0;
+
+ #endregion
+
+ #region macOS
+
+ [LibraryImport("libSystem.dylib", SetLastError = true)]
+ private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize);
+
+ [SupportedOSPlatform("macos")]
+ private static bool CheckSysctlName(string name)
+ {
+ ulong size = sizeof(int);
+ if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int))
+ {
+ return val != 0;
+ }
+ return false;
+ }
+
+ private static readonly string[] _sysctlNames = new string[]
+ {
+ "hw.optional.floatingpoint",
+ "hw.optional.AdvSIMD",
+ "hw.optional.arm.FEAT_FP16",
+ "hw.optional.arm.FEAT_AES",
+ "hw.optional.arm.FEAT_PMULL",
+ "hw.optional.arm.FEAT_LSE",
+ "hw.optional.armv8_crc32",
+ "hw.optional.arm.FEAT_SHA1",
+ "hw.optional.arm.FEAT_SHA256",
+ };
+
+ [Flags]
+ public enum MacOsFeatureFlags
+ {
+ Fp = 1 << 0,
+ AdvSimd = 1 << 1,
+ Fp16 = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Lse = 1 << 5,
+ Crc32 = 1 << 6,
+ Sha1 = 1 << 7,
+ Sha256 = 1 << 8,
+ }
+
+ public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0;
+
+ #endregion
+
+ public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd);
+ public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes);
+ public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull);
+ public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse);
+ public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32);
+ public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1);
+ public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256);
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
new file mode 100644
index 0000000..956fc77
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ readonly struct IntrinsicInfo
+ {
+ public uint Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(uint inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
new file mode 100644
index 0000000..dbd5bdd
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
@@ -0,0 +1,465 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class IntrinsicTable
+ {
+ private static readonly IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+#pragma warning disable IDE0055 // Disable formatting
+ Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128BinaryRd));
+ Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128BinaryRd));
+ Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt));
+ Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel));
+ Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv));
+ Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi));
+ Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi));
+ Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem));
+ Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
+ Add(Intrinsic.Arm64MrsFpcr, new IntrinsicInfo(0xd53b4400u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpcr, new IntrinsicInfo(0xd51b4400u, IntrinsicType.SetRegister));
+ Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
+ Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni));
+ Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd));
+ Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd));
+ Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary));
+#pragma warning restore IDE0055
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
new file mode 100644
index 0000000..7538575
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
@@ -0,0 +1,60 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum IntrinsicType
+ {
+ ScalarUnary,
+ ScalarUnaryByElem,
+ ScalarBinary,
+ ScalarBinaryByElem,
+ ScalarBinaryFPByElem,
+ ScalarBinaryRd,
+ ScalarBinaryShl,
+ ScalarBinaryShr,
+ ScalarFcsel,
+ ScalarFmovi,
+ ScalarFPCompare,
+ ScalarFPCompareCond,
+ ScalarFPConv,
+ ScalarFPConvFixed,
+ ScalarFPConvFixedGpr,
+ ScalarFPConvGpr,
+ ScalarTernary,
+ ScalarTernaryFPRdByElem,
+ ScalarTernaryShlRd,
+ ScalarTernaryShrRd,
+
+ Vector128Unary,
+ Vector128Binary,
+ Vector128BinaryRd,
+
+ VectorUnary,
+ VectorUnaryBitwise,
+ VectorUnaryByElem,
+ VectorBinary,
+ VectorBinaryBitwise,
+ VectorBinaryBitwiseImm,
+ VectorBinaryByElem,
+ VectorBinaryFPByElem,
+ VectorBinaryRd,
+ VectorBinaryShl,
+ VectorBinaryShr,
+ VectorExt,
+ VectorFmovi,
+ VectorFPConvFixed,
+ VectorInsertByElem,
+ VectorLdSt,
+ VectorLdStSs,
+ VectorLookupTable,
+ VectorMovi,
+ VectorMvni,
+ VectorTernaryFPRdByElem,
+ VectorTernaryRd,
+ VectorTernaryRdBitwise,
+ VectorTernaryRdByElem,
+ VectorTernaryShlRd,
+ VectorTernaryShrRd,
+
+ GetRegister,
+ SetRegister,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
new file mode 100644
index 0000000..f66bb66
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
@@ -0,0 +1,887 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class PreAllocator
+ {
+ private class ConstantDict
+ {
+ private readonly Dictionary<(ulong, OperandType), Operand> _constants;
+
+ public ConstantDict()
+ {
+ _constants = new Dictionary<(ulong, OperandType), Operand>();
+ }
+
+ public void Add(ulong value, OperandType type, Operand local)
+ {
+ _constants.Add((value, type), local);
+ }
+
+ public bool TryGetValue(ulong value, OperandType type, out Operand local)
+ {
+ return _constants.TryGetValue((value, type), out local);
+ }
+ }
+
+ public static void RunPass(CompilerContext cctx, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ Span buffer = default;
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ ConstantDict constants = new();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ InsertConstantRegCopies(constants, block.Operations, node);
+ InsertDestructiveRegCopies(block.Operations, node);
+
+ switch (node.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = node.SourcesCount - 1;
+
+ if (node.Destination != default && node.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ InsertCallCopies(constants, block.Operations, node);
+ break;
+ case Instruction.CompareAndSwap:
+ case Instruction.CompareAndSwap16:
+ case Instruction.CompareAndSwap8:
+ nextNode = GenerateCompareAndSwap(block.Operations, node);
+ break;
+ case Instruction.LoadArgument:
+ nextNode = InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ break;
+ case Instruction.Return:
+ InsertReturnCopy(block.Operations, node);
+ break;
+ case Instruction.Tailcall:
+ InsertTailcallCopies(constants, block.Operations, node, node);
+ break;
+ }
+ }
+ }
+ }
+
+ private static void InsertConstantRegCopies(ConstantDict constants, IntrusiveList nodes, Operation node)
+ {
+ if (node.SourcesCount == 0 || IsIntrinsicWithConst(node))
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand src1 = node.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddFloatConstantCopy(constants, nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(node, src1.Value))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(node))
+ {
+ src2 = node.GetSource(1);
+
+ (src2, src1) = (src1, src2);
+
+ node.SetSource(0, src1);
+ node.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddIntConstantCopy(constants, nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (node.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = node.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddFloatConstantCopy(constants, nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst, src2))
+ {
+ src2 = AddIntConstantCopy(constants, nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ }
+
+ if (node.SourcesCount < 3 ||
+ node.Instruction == Instruction.BranchIf ||
+ node.Instruction == Instruction.Compare ||
+ node.Instruction == Instruction.VectorInsert ||
+ node.Instruction == Instruction.VectorInsert16 ||
+ node.Instruction == Instruction.VectorInsert8)
+ {
+ return;
+ }
+
+ for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand src = node.GetSource(srcIndex);
+
+ if (src.Kind == OperandKind.Constant)
+ {
+ if (!src.Type.IsInteger())
+ {
+ src = AddFloatConstantCopy(constants, nodes, node, src);
+
+ node.SetSource(srcIndex, src);
+ }
+ else
+ {
+ src = AddIntConstantCopy(constants, nodes, node, src);
+
+ node.SetSource(srcIndex, src);
+ }
+ }
+ }
+ }
+
+ private static void InsertDestructiveRegCopies(IntrusiveList nodes, Operation node)
+ {
+ if (node.Destination == default || node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0);
+
+ if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ if (node.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+
+ node.SetSource(0, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+
+ node.SetSource(0, dest);
+ }
+ }
+ }
+
+ private static void InsertCallCopies(ConstantDict constants, IntrusiveList nodes, Operation node)
+ {
+ Operation operation = node;
+
+ Operand dest = operation.Destination;
+
+ List sources = new()
+ {
+ operation.GetSource(0),
+ };
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = Const(stackOffset);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, spillOp));
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ operation.Destination = retReg;
+ }
+ }
+
+ operation.SetSources(sources.ToArray());
+ }
+
+ private static void InsertTailcallCopies(ConstantDict constants,
+ IntrusiveList nodes,
+ Operation node,
+ Operation operation)
+ {
+ List sources = new()
+ {
+ operation.GetSource(0),
+ };
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(1 + index);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = tcAddress;
+
+ operation.SetSources(sources.ToArray());
+ }
+
+ private static Operation GenerateCompareAndSwap(IntrusiveList nodes, Operation node)
+ {
+ Operand expected = node.GetSource(1);
+
+ if (expected.Type == OperandType.V128)
+ {
+ Operand dest = node.Destination;
+ Operand expectedLow = Local(OperandType.I64);
+ Operand expectedHigh = Local(OperandType.I64);
+ Operand desiredLow = Local(OperandType.I64);
+ Operand desiredHigh = Local(OperandType.I64);
+ Operand actualLow = Local(OperandType.I64);
+ Operand actualHigh = Local(OperandType.I64);
+
+ Operand address = node.GetSource(0);
+ Operand desired = node.GetSource(2);
+
+ void SplitOperand(Operand source, Operand low, Operand high)
+ {
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1)));
+ }
+
+ SplitOperand(expected, expectedLow, expectedHigh);
+ SplitOperand(desired, desiredLow, desiredHigh);
+
+ Operation operation = node;
+
+ // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values.
+ // We also need a additional registers that will be used to store temporary information.
+ operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) });
+ operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh });
+
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+
+ // Assemble the vector with the 64-bit values at the given memory location.
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow));
+ node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1)));
+ }
+ else
+ {
+ // We need a additional register where the store result will be written to.
+ node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) });
+
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ Operation operation = node;
+
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+ }
+
+ return node.ListNext;
+ }
+
+ private static void InsertReturnCopy(IntrusiveList nodes, Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+ }
+
+ private static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span buffer,
+ IntrusiveList nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation nextNode;
+
+ if (dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ private static Operand AddFloatConstantCopy(
+ ConstantDict constants,
+ IntrusiveList nodes,
+ Operation node,
+ Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source));
+
+ Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand AddIntConstantCopy(
+ ConstantDict constants,
+ IntrusiveList nodes,
+ Operation node,
+ Operand source)
+ {
+ if (constants.TryGetValue(source.Value, source.Type, out Operand temp))
+ {
+ return temp;
+ }
+
+ temp = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, temp, source);
+
+ nodes.AddBefore(node, copyOp);
+
+ constants.Add(source.Value, source.Type, temp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ private static void Delete(IntrusiveList nodes, Operation node)
+ {
+ node.Destination = default;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ nodes.Remove(node);
+ }
+
+ private static Operand Gpr(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Extended:
+ return IsSameOperandDestSrc1(operation.Intrinsic);
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsSameOperandDestSrc1(Intrinsic intrinsic)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ return info.Type == IntrinsicType.ScalarBinaryRd ||
+ info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
+ info.Type == IntrinsicType.ScalarTernaryShlRd ||
+ info.Type == IntrinsicType.ScalarTernaryShrRd ||
+ info.Type == IntrinsicType.Vector128BinaryRd ||
+ info.Type == IntrinsicType.VectorBinaryRd ||
+ info.Type == IntrinsicType.VectorInsertByElem ||
+ info.Type == IntrinsicType.VectorTernaryRd ||
+ info.Type == IntrinsicType.VectorTernaryRdBitwise ||
+ info.Type == IntrinsicType.VectorTernaryFPRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryShlRd ||
+ info.Type == IntrinsicType.VectorTernaryShrRd;
+ }
+
+ private static bool HasConstSrc1(Operation node, ulong value)
+ {
+ switch (node.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ // The immediate encoding of those instructions does not allow Rn to be
+ // XZR (it will be SP instead), so we can't allow a Rn constant in this case.
+ return value == 0 && NotConstOrConst0(node.GetSource(1));
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.CountLeadingZeros:
+ case Instruction.Multiply:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return value == 0;
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ case Instruction.Extended:
+ return value == 0;
+ }
+
+ return false;
+ }
+
+ private static bool NotConstOrConst0(Operand operand)
+ {
+ return operand.Kind != OperandKind.Constant || operand.Value == 0;
+ }
+
+ private static bool HasConstSrc2(Instruction inst, Operand operand)
+ {
+ ulong value = operand.Value;
+
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ return ConstFitsOnUImm12Sh(value);
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _);
+ case Instruction.Multiply:
+ case Instruction.Store:
+ case Instruction.Store16:
+ case Instruction.Store8:
+ return value == 0;
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ case Instruction.Extended:
+ // TODO: Check if actual intrinsic is supposed to have consts here?
+ // Right now we only hit this case for fixed-point int <-> FP conversion instructions.
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.Multiply:
+ return true;
+
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var compType = (Comparison)comp.AsInt32();
+
+ return compType == Comparison.Equal || compType == Comparison.NotEqual;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool ConstFitsOnUImm12Sh(ulong value)
+ {
+ return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0;
+ }
+
+ private static bool IsIntrinsicWithConst(Operation operation)
+ {
+ bool isIntrinsic = IsIntrinsic(operation.Instruction);
+
+ if (isIntrinsic)
+ {
+ Intrinsic intrinsic = operation.Intrinsic;
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ // Those have integer inputs that don't support consts.
+ return info.Type != IntrinsicType.ScalarFPConvGpr &&
+ info.Type != IntrinsicType.ScalarFPConvFixedGpr &&
+ info.Type != IntrinsicType.SetRegister;
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/CompiledFunction.cs b/src/ARMeilleure/CodeGen/CompiledFunction.cs
new file mode 100644
index 0000000..3844cbf
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/CompiledFunction.cs
@@ -0,0 +1,68 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Translation.Cache;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen
+{
+ ///
+ /// Represents a compiled function.
+ ///
+ readonly struct CompiledFunction
+ {
+ ///
+ /// Gets the machine code of the .
+ ///
+ public byte[] Code { get; }
+
+ ///
+ /// Gets the of the .
+ ///
+ public UnwindInfo UnwindInfo { get; }
+
+ ///
+ /// Gets the of the .
+ ///
+ public RelocInfo RelocInfo { get; }
+
+ ///
+ /// Initializes a new instance of the struct with the specified machine code,
+ /// unwind info and relocation info.
+ ///
+ /// Machine code
+ /// Unwind info
+ /// Relocation info
+ internal CompiledFunction(byte[] code, UnwindInfo unwindInfo, RelocInfo relocInfo)
+ {
+ Code = code;
+ UnwindInfo = unwindInfo;
+ RelocInfo = relocInfo;
+ }
+
+ ///
+ /// Maps the onto the and returns a delegate of type
+ /// pointing to the mapped function.
+ ///
+ /// Type of delegate
+ /// A delegate of type pointing to the mapped function
+ public T Map()
+ {
+ return MapWithPointer(out _);
+ }
+
+ ///
+ /// Maps the onto the and returns a delegate of type
+ /// pointing to the mapped function.
+ ///
+ /// Type of delegate
+ /// Pointer to the function code in memory
+ /// A delegate of type pointing to the mapped function
+ public T MapWithPointer(out IntPtr codePointer)
+ {
+ codePointer = JitCache.Map(this);
+
+ return Marshal.GetDelegateForFunctionPointer(codePointer);
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs
new file mode 100644
index 0000000..d103bc3
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs
@@ -0,0 +1,38 @@
+namespace ARMeilleure.CodeGen.Linking
+{
+ ///
+ /// Represents a relocation.
+ ///
+ readonly struct RelocEntry
+ {
+ public const int Stride = 13; // Bytes.
+
+ ///
+ /// Gets the position of the relocation.
+ ///
+ public int Position { get; }
+
+ ///
+ /// Gets the of the relocation.
+ ///
+ public Symbol Symbol { get; }
+
+ ///
+ /// Initializes a new instance of the struct with the specified position and
+ /// .
+ ///
+ /// Position of relocation
+ /// Symbol of relocation
+ public RelocEntry(int position, Symbol symbol)
+ {
+ Position = position;
+ Symbol = symbol;
+ }
+
+ ///
+ public override string ToString()
+ {
+ return $"({nameof(Position)} = {Position}, {nameof(Symbol)} = {Symbol})";
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs
new file mode 100644
index 0000000..01ff034
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs
@@ -0,0 +1,32 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Linking
+{
+ ///
+ /// Represents relocation information about a .
+ ///
+ readonly struct RelocInfo
+ {
+ ///
+ /// Gets an empty .
+ ///
+ public static RelocInfo Empty { get; } = new RelocInfo(null);
+
+ private readonly RelocEntry[] _entries;
+
+ ///
+ /// Gets the set of .
+ ///
+ public ReadOnlySpan Entries => _entries;
+
+ ///
+ /// Initializes a new instance of the struct with the specified set of
+ /// .
+ ///
+ /// Set of to use
+ public RelocInfo(RelocEntry[] entries)
+ {
+ _entries = entries;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Linking/Symbol.cs b/src/ARMeilleure/CodeGen/Linking/Symbol.cs
new file mode 100644
index 0000000..5559afe
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/Symbol.cs
@@ -0,0 +1,99 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Linking
+{
+ ///
+ /// Represents a symbol.
+ ///
+ readonly struct Symbol
+ {
+ private readonly ulong _value;
+
+ ///
+ /// Gets the of the .
+ ///
+ public SymbolType Type { get; }
+
+ ///
+ /// Gets the value of the .
+ ///
+ /// is
+ public ulong Value
+ {
+ get
+ {
+ if (Type == SymbolType.None)
+ {
+ ThrowSymbolNone();
+ }
+
+ return _value;
+ }
+ }
+
+ ///
+ /// Initializes a new instance of the structure with the specified and value.
+ ///
+ /// Type of symbol
+ /// Value of symbol
+ public Symbol(SymbolType type, ulong value)
+ {
+ (Type, _value) = (type, value);
+ }
+
+ ///
+ /// Determines if the specified instances are equal.
+ ///
+ /// First instance
+ /// Second instance
+ /// if equal; otherwise
+ public static bool operator ==(Symbol a, Symbol b)
+ {
+ return a.Equals(b);
+ }
+
+ ///
+ /// Determines if the specified instances are not equal.
+ ///
+ /// First instance
+ /// Second instance
+ /// if not equal; otherwise
+ public static bool operator !=(Symbol a, Symbol b)
+ {
+ return !(a == b);
+ }
+
+ ///
+ /// Determines if the specified is equal to this instance.
+ ///
+ /// Other instance
+ /// if equal; otherwise
+ public bool Equals(Symbol other)
+ {
+ return other.Type == Type && other._value == _value;
+ }
+
+ ///
+ public override bool Equals(object obj)
+ {
+ return obj is Symbol sym && Equals(sym);
+ }
+
+ ///
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(Type, _value);
+ }
+
+ ///
+ public override string ToString()
+ {
+ return $"{Type}:{_value}";
+ }
+
+ private static void ThrowSymbolNone()
+ {
+ throw new InvalidOperationException("Symbol refers to nothing.");
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Linking/SymbolType.cs b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs
new file mode 100644
index 0000000..29011a7
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.CodeGen.Linking
+{
+ ///
+ /// Types of .
+ ///
+ enum SymbolType : byte
+ {
+ ///
+ /// Refers to nothing, i.e no symbol.
+ ///
+ None,
+
+ ///
+ /// Refers to an entry in .
+ ///
+ DelegateTable,
+
+ ///
+ /// Refers to an entry in .
+ ///
+ FunctionTable,
+
+ ///
+ /// Refers to a special symbol which is handled by .
+ ///
+ Special,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs
new file mode 100644
index 0000000..5f0e377
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs
@@ -0,0 +1,72 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class BlockPlacement
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ bool update = false;
+
+ BasicBlock block;
+ BasicBlock nextBlock;
+
+ BasicBlock lastBlock = cfg.Blocks.Last;
+
+ // Move cold blocks at the end of the list, so that they are emitted away from hot code.
+ for (block = cfg.Blocks.First; block != null; block = nextBlock)
+ {
+ nextBlock = block.ListNext;
+
+ if (block.Frequency == BasicBlockFrequency.Cold)
+ {
+ cfg.Blocks.Remove(block);
+ cfg.Blocks.AddLast(block);
+ }
+
+ if (block == lastBlock)
+ {
+ break;
+ }
+ }
+
+ for (block = cfg.Blocks.First; block != null; block = nextBlock)
+ {
+ nextBlock = block.ListNext;
+
+ if (block.SuccessorsCount == 2)
+ {
+ Operation branchOp = block.Operations.Last;
+
+ Debug.Assert(branchOp.Instruction == Instruction.BranchIf);
+
+ BasicBlock falseSucc = block.GetSuccessor(0);
+ BasicBlock trueSucc = block.GetSuccessor(1);
+
+ // If true successor is next block in list, invert the condition. We avoid extra branching by
+ // making the true side the fallthrough (i.e, convert it to the false side).
+ if (trueSucc == block.ListNext)
+ {
+ Comparison comp = (Comparison)branchOp.GetSource(2).AsInt32();
+ Comparison compInv = comp.Invert();
+
+ branchOp.SetSource(2, Const((int)compInv));
+
+ block.SetSuccessor(0, trueSucc);
+ block.SetSuccessor(1, falseSucc);
+
+ update = true;
+ }
+ }
+ }
+
+ if (update)
+ {
+ cfg.Update();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
new file mode 100644
index 0000000..be3dff5
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -0,0 +1,346 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class ConstantFolding
+ {
+ public static void RunPass(Operation operation)
+ {
+ if (operation.Destination == default || operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ if (!AreAllSourcesConstant(operation))
+ {
+ return;
+ }
+
+ OperandType type = operation.Destination.Type;
+
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (operation.GetSource(0).Relocatable ||
+ operation.GetSource(1).Relocatable)
+ {
+ break;
+ }
+
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x + y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x + y);
+ }
+ break;
+
+ case Instruction.BitwiseAnd:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x & y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x & y);
+ }
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x ^ y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x ^ y);
+ }
+ break;
+
+ case Instruction.BitwiseNot:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => ~x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => ~x);
+ }
+ break;
+
+ case Instruction.BitwiseOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x | y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x | y);
+ }
+ break;
+
+ case Instruction.ConvertI64ToI32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Compare:
+ if (type == OperandType.I32 &&
+ operation.GetSource(0).Type == type &&
+ operation.GetSource(1).Type == type)
+ {
+ switch ((Comparison)operation.GetSource(2).Value)
+ {
+ case Comparison.Equal:
+ EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0);
+ break;
+ case Comparison.NotEqual:
+ EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0);
+ break;
+ case Comparison.Greater:
+ EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0);
+ break;
+ case Comparison.GreaterUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0);
+ break;
+ case Comparison.Less:
+ EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0);
+ break;
+ }
+ }
+ break;
+
+ case Instruction.Copy:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Divide:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ break;
+
+ case Instruction.DivideUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0);
+ }
+ break;
+
+ case Instruction.Multiply:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x * y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x * y);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => -x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => -x);
+ }
+ break;
+
+ case Instruction.ShiftLeft:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x << y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x << (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightSI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x >> y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x >> (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y));
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y));
+ }
+ break;
+
+ case Instruction.SignExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (short)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (short)x);
+ }
+ break;
+
+ case Instruction.SignExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (int)x);
+ }
+ break;
+
+ case Instruction.SignExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (sbyte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (sbyte)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (ushort)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (ushort)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (uint)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (byte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (byte)x);
+ }
+ break;
+
+ case Instruction.Subtract:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x - y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x - y);
+ }
+ break;
+ }
+ }
+
+ private static bool AreAllSourcesConstant(Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand srcOp = operation.GetSource(index);
+
+ if (srcOp.Kind != OperandKind.Constant)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static void EvaluateUnaryI32(Operation operation, Func op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateUnaryI64(Operation operation, Func op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateBinaryI32(Operation operation, Func op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+ int y = operation.GetSource(1).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+
+ private static void EvaluateBinaryI64(Operation operation, Func op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+ long y = operation.GetSource(1).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
new file mode 100644
index 0000000..1afc3a7
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -0,0 +1,252 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Optimizer
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ // Scratch buffer used to store uses.
+ Span buffer = default;
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious)
+ {
+ Operation node;
+ Operation prevNode;
+
+ for (node = block.Operations.Last; node != default; node = prevNode)
+ {
+ prevNode = node.ListPrevious;
+
+ if (IsUnused(node))
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+
+ continue;
+ }
+ else if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ ConstantFolding.RunPass(node);
+ Simplification.RunPass(node);
+
+ if (DestIsSingleLocalVar(node))
+ {
+ if (IsPropagableCompare(node))
+ {
+ modified |= PropagateCompare(ref buffer, node);
+
+ if (modified && IsUnused(node))
+ {
+ RemoveNode(block, node);
+ }
+ }
+ else if (IsPropagableCopy(node))
+ {
+ PropagateCopy(ref buffer, node);
+
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ }
+ }
+ }
+ }
+ while (modified);
+ }
+
+ public static void RemoveUnusedNodes(ControlFlowGraph cfg)
+ {
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious)
+ {
+ Operation node;
+ Operation prevNode;
+
+ for (node = block.Operations.Last; node != default; node = prevNode)
+ {
+ prevNode = node.ListPrevious;
+
+ if (IsUnused(node))
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ }
+ }
+ }
+ while (modified);
+ }
+
+ private static bool PropagateCompare(ref Span buffer, Operation compOp)
+ {
+ // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form
+ // of:
+ //
+ // - BranchIf %x, 0x0, Equal ;; i.e BranchIfFalse %x
+ // - BranchIf %x, 0x0, NotEqual ;; i.e BranchIfTrue %x
+ //
+ // The commutative property of Equal and NotEqual is taken into consideration as well.
+ //
+ // For example:
+ //
+ // %x = Compare %a, %b, comp
+ // BranchIf %x, 0x0, NotEqual
+ //
+ // =>
+ //
+ // BranchIf %a, %b, comp
+
+ static bool IsZeroBranch(Operation operation, out Comparison compType)
+ {
+ compType = Comparison.Equal;
+
+ if (operation.Instruction != Instruction.BranchIf)
+ {
+ return false;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand comp = operation.GetSource(2);
+
+ compType = (Comparison)comp.AsInt32();
+
+ return (src1.Kind == OperandKind.Constant && src1.Value == 0) ||
+ (src2.Kind == OperandKind.Constant && src2.Value == 0);
+ }
+
+ bool modified = false;
+
+ Operand dest = compOp.Destination;
+ Operand src1 = compOp.GetSource(0);
+ Operand src2 = compOp.GetSource(1);
+ Operand comp = compOp.GetSource(2);
+
+ Comparison compType = (Comparison)comp.AsInt32();
+
+ Span uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ // If operation is a BranchIf and has a constant value 0 in its RHS or LHS source operands.
+ if (IsZeroBranch(use, out Comparison otherCompType))
+ {
+ Comparison propCompType;
+
+ if (otherCompType == Comparison.NotEqual)
+ {
+ propCompType = compType;
+ }
+ else if (otherCompType == Comparison.Equal)
+ {
+ propCompType = compType.Invert();
+ }
+ else
+ {
+ continue;
+ }
+
+ use.SetSource(0, src1);
+ use.SetSource(1, src2);
+ use.SetSource(2, Const((int)propCompType));
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ private static void PropagateCopy(ref Span buffer, Operation copyOp)
+ {
+ // Propagate copy source operand to all uses of the destination operand.
+ Operand dest = copyOp.Destination;
+ Operand source = copyOp.GetSource(0);
+
+ Span uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ for (int index = 0; index < use.SourcesCount; index++)
+ {
+ if (use.GetSource(index) == dest)
+ {
+ use.SetSource(index, source);
+ }
+ }
+ }
+ }
+
+ private static void RemoveNode(BasicBlock block, Operation node)
+ {
+ // Remove a node from the nodes list, and also remove itself
+ // from all the use lists on the operands that this node uses.
+ block.Operations.Remove(node);
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ Debug.Assert(node.Destination == default || node.Destination.UsesCount == 0);
+
+ node.Destination = default;
+ }
+
+ private static bool IsUnused(Operation node)
+ {
+ return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
+ }
+
+ private static bool DestIsSingleLocalVar(Operation node)
+ {
+ return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable;
+ }
+
+ private static bool HasSideEffects(Operation node)
+ {
+ return node.Instruction == Instruction.Call
+ || node.Instruction == Instruction.Tailcall
+ || node.Instruction == Instruction.CompareAndSwap
+ || node.Instruction == Instruction.CompareAndSwap16
+ || node.Instruction == Instruction.CompareAndSwap8;
+ }
+
+ private static bool IsPropagableCompare(Operation operation)
+ {
+ return operation.Instruction == Instruction.Compare;
+ }
+
+ private static bool IsPropagableCopy(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Copy)
+ {
+ return false;
+ }
+
+ return operation.Destination.Type == operation.GetSource(0).Type;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs
new file mode 100644
index 0000000..53a7f3e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs
@@ -0,0 +1,182 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Simplification
+ {
+ public static void RunPass(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (operation.GetSource(0).Relocatable ||
+ operation.GetSource(1).Relocatable)
+ {
+ break;
+ }
+
+ TryEliminateBinaryOpComutative(operation, 0);
+ break;
+
+ case Instruction.BitwiseAnd:
+ TryEliminateBitwiseAnd(operation);
+ break;
+
+ case Instruction.BitwiseOr:
+ TryEliminateBitwiseOr(operation);
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ TryEliminateBitwiseExclusiveOr(operation);
+ break;
+
+ case Instruction.ConditionalSelect:
+ TryEliminateConditionalSelect(operation);
+ break;
+
+ case Instruction.Divide:
+ TryEliminateBinaryOpY(operation, 1);
+ break;
+
+ case Instruction.Multiply:
+ TryEliminateBinaryOpComutative(operation, 1);
+ break;
+
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Subtract:
+ TryEliminateBinaryOpY(operation, 0);
+ break;
+ }
+ }
+
+ private static void TryEliminateBitwiseAnd(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
+ // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, AllOnes(x.Type)))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(Const(x.Type, 0));
+ }
+ }
+
+ private static void TryEliminateBitwiseOr(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x | 0x00000000 == x, 0x00000000 | y == y,
+ // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, 0))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(Const(AllOnes(x.Type)));
+ }
+ }
+
+ private static void TryEliminateBitwiseExclusiveOr(Operation operation)
+ {
+ // Try to recognize and optimize those 2 patterns (in order):
+ // x ^ y == 0x00000000 when x == y
+ // 0x00000000 ^ y == y, x ^ 0x00000000 == x
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (x == y && x.Type.IsInteger())
+ {
+ operation.TurnIntoCopy(Const(x.Type, 0));
+ }
+ else
+ {
+ TryEliminateBinaryOpComutative(operation, 0);
+ }
+ }
+
+ private static void TryEliminateBinaryOpY(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, comparand))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateConditionalSelect(Operation operation)
+ {
+ Operand cond = operation.GetSource(0);
+
+ if (cond.Kind != OperandKind.Constant)
+ {
+ return;
+ }
+
+ // The condition is constant, we can turn it into a copy, and select
+ // the source based on the condition value.
+ int srcIndex = cond.Value != 0 ? 1 : 2;
+
+ Operand source = operation.GetSource(srcIndex);
+
+ operation.TurnIntoCopy(source);
+ }
+
+ private static bool IsConstEqual(Operand operand, ulong comparand)
+ {
+ if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger())
+ {
+ return false;
+ }
+
+ return operand.Value == comparand;
+ }
+
+ private static ulong AllOnes(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.I32 => ~0U,
+ OperandType.I64 => ~0UL,
+ _ => throw new ArgumentException("Invalid operand type \"" + type + "\"."),
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs
new file mode 100644
index 0000000..e63c4da
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs
@@ -0,0 +1,83 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class TailMerge
+ {
+ public static void RunPass(in CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ BasicBlock mergedReturn = new(cfg.Blocks.Count);
+
+ Operand returnValue;
+ Operation returnOp;
+
+ if (cctx.FuncReturnType == OperandType.None)
+ {
+ returnValue = default;
+ returnOp = Operation(Instruction.Return, default);
+ }
+ else
+ {
+ returnValue = cfg.AllocateLocal(cctx.FuncReturnType);
+ returnOp = Operation(Instruction.Return, default, returnValue);
+ }
+
+ mergedReturn.Frequency = BasicBlockFrequency.Cold;
+ mergedReturn.Operations.AddLast(returnOp);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ Operation op = block.Operations.Last;
+
+ if (op != default && op.Instruction == Instruction.Return)
+ {
+ block.Operations.Remove(op);
+
+ if (cctx.FuncReturnType == OperandType.None)
+ {
+ PrepareMerge(block, mergedReturn);
+ }
+ else
+ {
+ Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0));
+
+ PrepareMerge(block, mergedReturn).Append(copyOp);
+ }
+ }
+ }
+
+ cfg.Blocks.AddLast(mergedReturn);
+ cfg.Update();
+ }
+
+ private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to)
+ {
+ BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null;
+
+ // If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps.
+ if (from.Operations.Count == 0 && fromPred != null && fromPred.SuccessorsCount == 1)
+ {
+ for (int i = 0; i < fromPred.SuccessorsCount; i++)
+ {
+ if (fromPred.GetSuccessor(i) == from)
+ {
+ fromPred.SetSuccessor(i, to);
+ }
+ }
+
+ // NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it.
+ return fromPred;
+ }
+ else
+ {
+ from.AddSuccessor(to);
+
+ return from;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs
new file mode 100644
index 0000000..53f279f
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs
@@ -0,0 +1,57 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen
+{
+ static class PreAllocatorCommon
+ {
+ public static void Propagate(ref Span buffer, Operand dest, Operand value)
+ {
+ ReadOnlySpan uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++)
+ {
+ Operand useSrc = use.GetSource(srcIndex);
+
+ if (useSrc == dest)
+ {
+ use.SetSource(srcIndex, value);
+ }
+ else if (useSrc.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memoryOp = useSrc.GetMemory();
+
+ Operand baseAddr = memoryOp.BaseAddress;
+ Operand index = memoryOp.Index;
+ bool changed = false;
+
+ if (baseAddr == dest)
+ {
+ baseAddr = value;
+ changed = true;
+ }
+
+ if (index == dest)
+ {
+ index = value;
+ changed = true;
+ }
+
+ if (changed)
+ {
+ use.SetSource(srcIndex, MemoryOp(
+ useSrc.Type,
+ baseAddr,
+ index,
+ memoryOp.Scale,
+ memoryOp.Displacement));
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
new file mode 100644
index 0000000..7b9c2f7
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ readonly struct AllocationResult
+ {
+ public int IntUsedRegisters { get; }
+ public int VecUsedRegisters { get; }
+ public int SpillRegionSize { get; }
+
+ public AllocationResult(
+ int intUsedRegisters,
+ int vecUsedRegisters,
+ int spillRegionSize)
+ {
+ IntUsedRegisters = intUsedRegisters;
+ VecUsedRegisters = vecUsedRegisters;
+ SpillRegionSize = spillRegionSize;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
new file mode 100644
index 0000000..af10330
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
@@ -0,0 +1,249 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class CopyResolver
+ {
+ private class ParallelCopy
+ {
+ private readonly struct Copy
+ {
+ public Register Dest { get; }
+ public Register Source { get; }
+
+ public OperandType Type { get; }
+
+ public Copy(Register dest, Register source, OperandType type)
+ {
+ Dest = dest;
+ Source = source;
+ Type = type;
+ }
+ }
+
+ private readonly List _copies;
+
+ public int Count => _copies.Count;
+
+ public ParallelCopy()
+ {
+ _copies = new List();
+ }
+
+ public void AddCopy(Register dest, Register source, OperandType type)
+ {
+ _copies.Add(new Copy(dest, source, type));
+ }
+
+ public void Sequence(List sequence)
+ {
+ Dictionary locations = new();
+ Dictionary sources = new();
+
+ Dictionary types = new();
+
+ Queue pendingQueue = new();
+ Queue readyQueue = new();
+
+ foreach (Copy copy in _copies)
+ {
+ locations[copy.Source] = copy.Source;
+ sources[copy.Dest] = copy.Source;
+ types[copy.Dest] = copy.Type;
+
+ pendingQueue.Enqueue(copy.Dest);
+ }
+
+ foreach (Copy copy in _copies)
+ {
+ // If the destination is not used anywhere, we can assign it immediately.
+ if (!locations.ContainsKey(copy.Dest))
+ {
+ readyQueue.Enqueue(copy.Dest);
+ }
+ }
+
+ while (pendingQueue.TryDequeue(out Register current))
+ {
+ Register copyDest;
+ Register origSource;
+ Register copySource;
+
+ while (readyQueue.TryDequeue(out copyDest))
+ {
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ OperandType type = types[copyDest];
+
+ EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ if (origSource == copySource && sources.ContainsKey(origSource))
+ {
+ readyQueue.Enqueue(origSource);
+ }
+ }
+
+ copyDest = current;
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ if (copyDest != copySource)
+ {
+ OperandType type = types[copyDest];
+
+ type = type.IsInteger() ? OperandType.I64 : OperandType.V128;
+
+ EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ Register swapOther = copySource;
+
+ if (copyDest != locations[sources[copySource]])
+ {
+ // Find the other swap destination register.
+ // To do that, we search all the pending registers, and pick
+ // the one where the copy source register is equal to the
+ // current destination register being processed (copyDest).
+ foreach (Register pending in pendingQueue)
+ {
+ // Is this a copy of pending <- copyDest?
+ if (copyDest == locations[sources[pending]])
+ {
+ swapOther = pending;
+
+ break;
+ }
+ }
+ }
+
+ // The value that was previously at "copyDest" now lives on
+ // "copySource" thanks to the swap, now we need to update the
+ // location for the next copy that is supposed to copy the value
+ // that used to live on "copyDest".
+ locations[sources[swapOther]] = copySource;
+ }
+ }
+ }
+
+ private static void EmitCopy(List sequence, Operand x, Operand y)
+ {
+ sequence.Add(Operation(Instruction.Copy, x, y));
+ }
+
+ private static void EmitXorSwap(List sequence, Operand x, Operand y)
+ {
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, y, y, x));
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ }
+ }
+
+ private Queue _fillQueue = null;
+ private Queue _spillQueue = null;
+ private ParallelCopy _parallelCopy = null;
+
+ public bool HasCopy { get; private set; }
+
+ public void AddSplit(LiveInterval left, LiveInterval right)
+ {
+ if (left.Local != right.Local)
+ {
+ throw new ArgumentException("Intervals of different variables are not allowed.");
+ }
+
+ OperandType type = left.Local.Type;
+
+ if (left.IsSpilled && !right.IsSpilled)
+ {
+ // Move from the stack to a register.
+ AddSplitFill(left, right, type);
+ }
+ else if (!left.IsSpilled && right.IsSpilled)
+ {
+ // Move from a register to the stack.
+ AddSplitSpill(left, right, type);
+ }
+ else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register)
+ {
+ // Move from one register to another.
+ AddSplitCopy(left, right, type);
+ }
+ else if (left.SpillOffset != right.SpillOffset)
+ {
+ // This would be the stack-to-stack move case, but this is not supported.
+ throw new ArgumentException("Both intervals were spilled.");
+ }
+ }
+
+ private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ _fillQueue ??= new Queue();
+
+ Operand register = GetRegister(right.Register, type);
+ Operand offset = Const(left.SpillOffset);
+
+ _fillQueue.Enqueue(Operation(Instruction.Fill, register, offset));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ _spillQueue ??= new Queue();
+
+ Operand offset = Const(right.SpillOffset);
+ Operand register = GetRegister(left.Register, type);
+
+ _spillQueue.Enqueue(Operation(Instruction.Spill, default, offset, register));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ _parallelCopy ??= new ParallelCopy();
+
+ _parallelCopy.AddCopy(right.Register, left.Register, type);
+
+ HasCopy = true;
+ }
+
+ public Operation[] Sequence()
+ {
+ List sequence = new();
+
+ if (_spillQueue != null)
+ {
+ while (_spillQueue.TryDequeue(out Operation spillOp))
+ {
+ sequence.Add(spillOp);
+ }
+ }
+
+ _parallelCopy?.Sequence(sequence);
+
+ if (_fillQueue != null)
+ {
+ while (_fillQueue.TryDequeue(out Operation fillOp))
+ {
+ sequence.Add(fillOp);
+ }
+ }
+
+ return sequence.ToArray();
+ }
+
+ private static Operand GetRegister(Register reg, OperandType type)
+ {
+ return Register(reg.Index, reg.Type, type);
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
new file mode 100644
index 0000000..5f1d6ce
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
@@ -0,0 +1,454 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class HybridAllocator : IRegisterAllocator
+ {
+ private readonly struct BlockInfo
+ {
+ public bool HasCall { get; }
+
+ public int IntFixedRegisters { get; }
+ public int VecFixedRegisters { get; }
+
+ public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters)
+ {
+ HasCall = hasCall;
+ IntFixedRegisters = intFixedRegisters;
+ VecFixedRegisters = vecFixedRegisters;
+ }
+ }
+
+ private struct LocalInfo
+ {
+ public int Uses { get; set; }
+ public int UsesAllocated { get; set; }
+ public int Sequence { get; set; }
+ public Operand Temp { get; set; }
+ public Operand Register { get; set; }
+ public Operand SpillOffset { get; set; }
+ public OperandType Type { get; }
+
+ private int _first;
+ private int _last;
+
+ public readonly bool IsBlockLocal => _first == _last;
+
+ public LocalInfo(OperandType type, int uses, int blkIndex)
+ {
+ Uses = uses;
+ Type = type;
+
+ UsesAllocated = 0;
+ Sequence = 0;
+ Temp = default;
+ Register = default;
+ SpillOffset = default;
+
+ _first = -1;
+ _last = -1;
+
+ SetBlockIndex(blkIndex);
+ }
+
+ public void SetBlockIndex(int blkIndex)
+ {
+ if (_first == -1 || blkIndex < _first)
+ {
+ _first = blkIndex;
+ }
+
+ if (_last == -1 || blkIndex > _last)
+ {
+ _last = blkIndex;
+ }
+ }
+ }
+
+ private const int MaxIROperands = 4;
+ // The "visited" state is stored in the MSB of the local's value.
+ private const ulong VisitedMask = 1ul << 63;
+
+ private BlockInfo[] _blockInfo;
+ private LocalInfo[] _localInfo;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool IsVisited(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+ return (local.GetValueUnsafe() & VisitedMask) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void SetVisited(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+ local.GetValueUnsafe() |= VisitedMask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private ref LocalInfo GetLocalInfo(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+ Debug.Assert(IsVisited(local), "Local variable not visited. Used before defined?");
+
+ return ref _localInfo[(uint)local.GetValueUnsafe() - 1];
+ }
+
+ public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks)
+ {
+ int intUsedRegisters = 0;
+ int vecUsedRegisters = 0;
+
+ int intFreeRegisters = regMasks.IntAvailableRegisters;
+ int vecFreeRegisters = regMasks.VecAvailableRegisters;
+
+ _blockInfo = new BlockInfo[cfg.Blocks.Count];
+ _localInfo = new LocalInfo[cfg.Blocks.Count * 3];
+
+ int localInfoCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ int intFixedRegisters = 0;
+ int vecFixedRegisters = 0;
+
+ bool hasCall = false;
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ if (node.Instruction == Instruction.Call)
+ {
+ hasCall = true;
+ }
+
+ foreach (Operand source in node.SourcesUnsafe)
+ {
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ GetLocalInfo(source).SetBlockIndex(block.Index);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ GetLocalInfo(memOp.BaseAddress).SetBlockIndex(block.Index);
+ }
+
+ if (memOp.Index != default)
+ {
+ GetLocalInfo(memOp.Index).SetBlockIndex(block.Index);
+ }
+ }
+ }
+
+ foreach (Operand dest in node.DestinationsUnsafe)
+ {
+ if (dest.Kind == OperandKind.LocalVariable)
+ {
+ if (IsVisited(dest))
+ {
+ GetLocalInfo(dest).SetBlockIndex(block.Index);
+ }
+ else
+ {
+ dest.NumberLocal(++localInfoCount);
+
+ if (localInfoCount > _localInfo.Length)
+ {
+ Array.Resize(ref _localInfo, localInfoCount * 2);
+ }
+
+ SetVisited(dest);
+ GetLocalInfo(dest) = new LocalInfo(dest.Type, UsesCount(dest), block.Index);
+ }
+ }
+ else if (dest.Kind == OperandKind.Register)
+ {
+ if (dest.Type.IsInteger())
+ {
+ intFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ else
+ {
+ vecFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ }
+ }
+ }
+
+ _blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
+ }
+
+ int sequence = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ ref BlockInfo blkInfo = ref _blockInfo[block.Index];
+
+ int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
+ int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
+
+ int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0;
+ int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0;
+
+ int intSpillTempRegisters = SelectSpillTemps(
+ intCallerSavedRegisters & ~blkInfo.IntFixedRegisters,
+ intLocalFreeRegisters);
+ int vecSpillTempRegisters = SelectSpillTemps(
+ vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters,
+ vecLocalFreeRegisters);
+
+ intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters);
+ vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ int intLocalUse = 0;
+ int vecLocalUse = 0;
+
+ Operand AllocateRegister(Operand local)
+ {
+ ref LocalInfo info = ref GetLocalInfo(local);
+
+ info.UsesAllocated++;
+
+ Debug.Assert(info.UsesAllocated <= info.Uses);
+
+ if (info.Register != default)
+ {
+ if (info.UsesAllocated == info.Uses)
+ {
+ Register reg = info.Register.GetRegister();
+
+ if (local.Type.IsInteger())
+ {
+ intLocalFreeRegisters |= 1 << reg.Index;
+ }
+ else
+ {
+ vecLocalFreeRegisters |= 1 << reg.Index;
+ }
+ }
+
+ return info.Register;
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == default || info.Sequence != sequence)
+ {
+ temp = local.Type.IsInteger()
+ ? GetSpillTemp(local, intSpillTempRegisters, ref intLocalUse)
+ : GetSpillTemp(local, vecSpillTempRegisters, ref vecLocalUse);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ Operation fillOp = Operation(Instruction.Fill, temp, info.SpillOffset);
+
+ block.Operations.AddBefore(node, fillOp);
+
+ return temp;
+ }
+ }
+
+ bool folded = false;
+
+ // If operation is a copy of a local and that local is living on the stack, we turn the copy into
+ // a fill, instead of inserting a fill before it.
+ if (node.Instruction == Instruction.Copy)
+ {
+ Operand source = node.GetSource(0);
+
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ ref LocalInfo info = ref GetLocalInfo(source);
+
+ if (info.Register == default)
+ {
+ Operation fillOp = Operation(Instruction.Fill, node.Destination, info.SpillOffset);
+
+ block.Operations.AddBefore(node, fillOp);
+ block.Operations.Remove(node);
+
+ node = fillOp;
+
+ folded = true;
+ }
+ }
+ }
+
+ if (!folded)
+ {
+ foreach (ref Operand source in node.SourcesUnsafe)
+ {
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ source = AllocateRegister(source);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ memOp.BaseAddress = AllocateRegister(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ memOp.Index = AllocateRegister(memOp.Index);
+ }
+ }
+ }
+ }
+
+ int intLocalAsg = 0;
+ int vecLocalAsg = 0;
+
+ foreach (ref Operand dest in node.DestinationsUnsafe)
+ {
+ if (dest.Kind != OperandKind.LocalVariable)
+ {
+ continue;
+ }
+
+ ref LocalInfo info = ref GetLocalInfo(dest);
+
+ if (info.UsesAllocated == 0)
+ {
+ int mask = dest.Type.IsInteger()
+ ? intLocalFreeRegisters
+ : vecLocalFreeRegisters;
+
+ if (info.IsBlockLocal && mask != 0)
+ {
+ int selectedReg = BitOperations.TrailingZeroCount(mask);
+
+ info.Register = Register(selectedReg, info.Type.ToRegisterType(), info.Type);
+
+ if (dest.Type.IsInteger())
+ {
+ intLocalFreeRegisters &= ~(1 << selectedReg);
+ intUsedRegisters |= 1 << selectedReg;
+ }
+ else
+ {
+ vecLocalFreeRegisters &= ~(1 << selectedReg);
+ vecUsedRegisters |= 1 << selectedReg;
+ }
+ }
+ else
+ {
+ info.Register = default;
+ info.SpillOffset = Const(stackAlloc.Allocate(dest.Type.GetSizeInBytes()));
+ }
+ }
+
+ info.UsesAllocated++;
+
+ Debug.Assert(info.UsesAllocated <= info.Uses);
+
+ if (info.Register != default)
+ {
+ dest = info.Register;
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == default || info.Sequence != sequence)
+ {
+ temp = dest.Type.IsInteger()
+ ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg)
+ : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ dest = temp;
+
+ Operation spillOp = Operation(Instruction.Spill, default, info.SpillOffset, temp);
+
+ block.Operations.AddAfter(node, spillOp);
+
+ node = spillOp;
+ }
+ }
+
+ sequence++;
+
+ intUsedRegisters |= intLocalAsg | intLocalUse;
+ vecUsedRegisters |= vecLocalAsg | vecLocalUse;
+ }
+ }
+
+ return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
+ }
+
+ private static int SelectSpillTemps(int mask0, int mask1)
+ {
+ int selection = 0;
+ int count = 0;
+
+ while (count < MaxIROperands && mask0 != 0)
+ {
+ int mask = mask0 & -mask0;
+
+ selection |= mask;
+
+ mask0 &= ~mask;
+
+ count++;
+ }
+
+ while (count < MaxIROperands && mask1 != 0)
+ {
+ int mask = mask1 & -mask1;
+
+ selection |= mask;
+
+ mask1 &= ~mask;
+
+ count++;
+ }
+
+ Debug.Assert(count == MaxIROperands, "No enough registers for spill temps.");
+
+ return selection;
+ }
+
+ private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask)
+ {
+ int selectedReg = BitOperations.TrailingZeroCount(freeMask & ~useMask);
+
+ useMask |= 1 << selectedReg;
+
+ return Register(selectedReg, local.Type.ToRegisterType(), local.Type);
+ }
+
+ private static int UsesCount(Operand local)
+ {
+ return local.AssignmentsCount + local.UsesCount;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
new file mode 100644
index 0000000..7d4ce2e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
@@ -0,0 +1,12 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ interface IRegisterAllocator
+ {
+ AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks);
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
new file mode 100644
index 0000000..16feeb9
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -0,0 +1,1127 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ // Based on:
+ // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler".
+ // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf
+ class LinearScanAllocator : IRegisterAllocator
+ {
+ private const int InstructionGap = 2;
+ private const int InstructionGapMask = InstructionGap - 1;
+
+ private HashSet _blockEdges;
+ private LiveRange[] _blockRanges;
+ private BitMap[] _blockLiveIn;
+
+ private List _intervals;
+ private LiveInterval[] _parentIntervals;
+
+ private List<(IntrusiveList, Operation)> _operationNodes;
+ private int _operationsCount;
+
+ private class AllocationContext
+ {
+ public RegisterMasks Masks { get; }
+
+ public StackAllocator StackAlloc { get; }
+
+ public BitMap Active { get; }
+ public BitMap Inactive { get; }
+
+ public int IntUsedRegisters { get; set; }
+ public int VecUsedRegisters { get; set; }
+
+ private readonly int[] _intFreePositions;
+ private readonly int[] _vecFreePositions;
+ private readonly int _intFreePositionsCount;
+ private readonly int _vecFreePositionsCount;
+
+ public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount)
+ {
+ StackAlloc = stackAlloc;
+ Masks = masks;
+
+ Active = new BitMap(Allocators.Default, intervalsCount);
+ Inactive = new BitMap(Allocators.Default, intervalsCount);
+
+ PopulateFreePositions(RegisterType.Integer, out _intFreePositions, out _intFreePositionsCount);
+ PopulateFreePositions(RegisterType.Vector, out _vecFreePositions, out _vecFreePositionsCount);
+
+ void PopulateFreePositions(RegisterType type, out int[] positions, out int count)
+ {
+ positions = new int[masks.RegistersCount];
+ count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type));
+
+ int mask = masks.GetAvailableRegisters(type);
+
+ for (int i = 0; i < positions.Length; i++)
+ {
+ if ((mask & (1 << i)) != 0)
+ {
+ positions[i] = int.MaxValue;
+ }
+ }
+ }
+ }
+
+ public void GetFreePositions(RegisterType type, in Span positions, out int count)
+ {
+ if (type == RegisterType.Integer)
+ {
+ _intFreePositions.CopyTo(positions);
+
+ count = _intFreePositionsCount;
+ }
+ else
+ {
+ Debug.Assert(type == RegisterType.Vector);
+
+ _vecFreePositions.CopyTo(positions);
+
+ count = _vecFreePositionsCount;
+ }
+ }
+
+ public void MoveActiveToInactive(int bit)
+ {
+ Move(Active, Inactive, bit);
+ }
+
+ public void MoveInactiveToActive(int bit)
+ {
+ Move(Inactive, Active, bit);
+ }
+
+ private static void Move(BitMap source, BitMap dest, int bit)
+ {
+ source.Clear(bit);
+
+ dest.Set(bit);
+ }
+ }
+
+ public AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks)
+ {
+ NumberLocals(cfg, regMasks.RegistersCount);
+
+ var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
+
+ BuildIntervals(cfg, context);
+
+ for (int index = 0; index < _intervals.Count; index++)
+ {
+ LiveInterval current = _intervals[index];
+
+ if (current.IsEmpty)
+ {
+ continue;
+ }
+
+ if (current.IsFixed)
+ {
+ context.Active.Set(index);
+
+ if (current.IsFixedAndUsed)
+ {
+ if (current.Register.Type == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << current.Register.Index;
+ }
+ else /* if (interval.Register.Type == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << current.Register.Index;
+ }
+ }
+
+ continue;
+ }
+
+ AllocateInterval(context, current, index, regMasks.RegistersCount);
+ }
+
+ for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++)
+ {
+ if (!_intervals[index].IsSpilled)
+ {
+ ReplaceLocalWithRegister(_intervals[index]);
+ }
+ }
+
+ InsertSplitCopies();
+ InsertSplitCopiesAtEdges(cfg);
+
+ return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize);
+ }
+
+ private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ // Check active intervals that already ended.
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ interval.Forward(current.GetStart());
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Active.Clear(iIndex);
+ }
+ else if (!interval.Overlaps(current.GetStart()))
+ {
+ context.MoveActiveToInactive(iIndex);
+ }
+ }
+
+ // Check inactive intervals that already ended or were reactivated.
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ interval.Forward(current.GetStart());
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Inactive.Clear(iIndex);
+ }
+ else if (interval.Overlaps(current.GetStart()))
+ {
+ context.MoveInactiveToActive(iIndex);
+ }
+ }
+
+ if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount))
+ {
+ AllocateRegWithSpill(context, current, cIndex, registersCount);
+ }
+ }
+
+ private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ Span freePositions = stackalloc int[registersCount];
+
+ context.GetFreePositions(regType, freePositions, out int freePositionsCount);
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ freePositions[reg.Index] = 0;
+ freePositionsCount--;
+ }
+ }
+
+ // If all registers are already active, return early. No point in inspecting the inactive set to look for
+ // holes.
+ if (freePositionsCount == 0)
+ {
+ return false;
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ ref int freePosition = ref freePositions[reg.Index];
+
+ if (reg.Type == regType && freePosition != 0)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound && freePosition > overlapPosition)
+ {
+ freePosition = overlapPosition;
+ }
+ }
+ }
+
+ // If this is a copy destination variable, we prefer the register used for the copy source.
+ // If the register is available, then the copy can be eliminated later as both source
+ // and destination will use the same register.
+ int selectedReg;
+
+ if (current.TryGetCopySourceRegister(out int preferredReg) && freePositions[preferredReg] >= current.GetEnd())
+ {
+ selectedReg = preferredReg;
+ }
+ else
+ {
+ selectedReg = GetHighestValueIndex(freePositions);
+ }
+
+ int selectedNextUse = freePositions[selectedReg];
+
+ // Intervals starts and ends at odd positions, unless they span an entire
+ // block, in this case they will have ranges at a even position.
+ // When a interval is loaded from the stack to a register, we can only
+ // do the split at a odd position, because otherwise the split interval
+ // that is inserted on the list to be processed may clobber a register
+ // used by the instruction at the same position as the split.
+ // The problem only happens when a interval ends exactly at this instruction,
+ // because otherwise they would interfere, and the register wouldn't be selected.
+ // When the interval is aligned and the above happens, there's no problem as
+ // the instruction that is actually with the last use is the one
+ // before that position.
+ selectedNextUse &= ~InstructionGapMask;
+
+ if (selectedNextUse <= current.GetStart())
+ {
+ return false;
+ }
+ else if (selectedNextUse < current.GetEnd())
+ {
+ LiveInterval splitChild = current.Split(selectedNextUse);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ current.Register = new Register(selectedReg, regType);
+
+ if (regType == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << selectedReg;
+ }
+ else /* if (regType == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << selectedReg;
+ }
+
+ context.Active.Set(cIndex);
+
+ return true;
+ }
+
+ private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ Span usePositions = stackalloc int[registersCount];
+ Span blockedPositions = stackalloc int[registersCount];
+
+ context.GetFreePositions(regType, usePositions, out _);
+ context.GetFreePositions(regType, blockedPositions, out _);
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ ref int usePosition = ref usePositions[reg.Index];
+ ref int blockedPosition = ref blockedPositions[reg.Index];
+
+ if (interval.IsFixed)
+ {
+ usePosition = 0;
+ blockedPosition = 0;
+ }
+ else
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != LiveInterval.NotFound && usePosition > nextUse)
+ {
+ usePosition = nextUse;
+ }
+ }
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ ref int usePosition = ref usePositions[reg.Index];
+ ref int blockedPosition = ref blockedPositions[reg.Index];
+
+ if (interval.IsFixed)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound)
+ {
+ blockedPosition = Math.Min(blockedPosition, overlapPosition);
+ usePosition = Math.Min(usePosition, overlapPosition);
+ }
+ }
+ else if (interval.Overlaps(current))
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != LiveInterval.NotFound && usePosition > nextUse)
+ {
+ usePosition = nextUse;
+ }
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(usePositions);
+ int currentFirstUse = current.FirstUse();
+
+ Debug.Assert(currentFirstUse >= 0, "Current interval has no uses.");
+
+ if (usePositions[selectedReg] < currentFirstUse)
+ {
+ // All intervals on inactive and active are being used before current,
+ // so spill the current interval.
+ Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ LiveInterval splitChild = current.Split(currentFirstUse);
+
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+
+ Spill(context, current);
+ }
+ else if (blockedPositions[selectedReg] > current.GetEnd())
+ {
+ // Spill made the register available for the entire current lifetime,
+ // so we only need to split the intervals using the selected register.
+ current.Register = new Register(selectedReg, regType);
+
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
+
+ context.Active.Set(cIndex);
+ }
+ else
+ {
+ // There are conflicts even after spill due to the use of fixed registers
+ // that can't be spilled, so we need to also split current at the point of
+ // the first fixed register use.
+ current.Register = new Register(selectedReg, regType);
+
+ int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask;
+
+ Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position.");
+
+ LiveInterval splitChild = current.Split(splitPosition);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
+
+ context.Active.Set(cIndex);
+ }
+ }
+
+ private static int GetHighestValueIndex(ReadOnlySpan span)
+ {
+ int highest = int.MinValue;
+
+ int selected = 0;
+
+ for (int index = 0; index < span.Length; index++)
+ {
+ int current = span[index];
+
+ if (highest < current)
+ {
+ highest = current;
+ selected = index;
+
+ if (current == int.MaxValue)
+ {
+ break;
+ }
+ }
+ }
+
+ return selected;
+ }
+
+ private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount)
+ {
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register)
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
+
+ context.Active.Clear(iIndex);
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
+
+ context.Inactive.Clear(iIndex);
+ }
+ }
+ }
+
+ private void SplitAndSpillOverlappingInterval(
+ AllocationContext context,
+ LiveInterval current,
+ LiveInterval interval,
+ int registersCount)
+ {
+ // If there's a next use after the start of the current interval,
+ // we need to split the spilled interval twice, and re-insert it
+ // on the "pending" list to ensure that it will get a new register
+ // on that use position.
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ LiveInterval splitChild;
+
+ if (interval.GetStart() < current.GetStart())
+ {
+ splitChild = interval.Split(current.GetStart());
+ }
+ else
+ {
+ splitChild = interval;
+ }
+
+ if (nextUse != -1)
+ {
+ Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ if (nextUse > splitChild.GetStart())
+ {
+ LiveInterval right = splitChild.Split(nextUse);
+
+ Spill(context, splitChild);
+
+ splitChild = right;
+ }
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ private void InsertInterval(LiveInterval interval, int registersCount)
+ {
+ Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
+ Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
+ Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
+
+ int startIndex = registersCount * 2;
+
+ int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
+
+ if (insertIndex < 0)
+ {
+ insertIndex = ~insertIndex;
+ }
+
+ _intervals.Insert(insertIndex, interval);
+ }
+
+ private static void Spill(AllocationContext context, LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval.");
+ Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses.");
+
+ // We first check if any of the siblings were spilled, if so we can reuse
+ // the stack offset. Otherwise, we allocate a new space on the stack.
+ // This prevents stack-to-stack copies being necessary for a split interval.
+ if (!interval.TrySpillWithSiblingOffset())
+ {
+ interval.Spill(context.StackAlloc.Allocate(interval.Local.Type));
+ }
+ }
+
+ private void InsertSplitCopies()
+ {
+ Dictionary copyResolvers = new();
+
+ CopyResolver GetCopyResolver(int position)
+ {
+ if (!copyResolvers.TryGetValue(position, out CopyResolver copyResolver))
+ {
+ copyResolver = new CopyResolver();
+
+ copyResolvers.Add(position, copyResolver);
+ }
+
+ return copyResolver;
+ }
+
+ foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit))
+ {
+ LiveInterval previous = interval;
+
+ foreach (LiveInterval splitChild in interval.SplitChildren())
+ {
+ int splitPosition = splitChild.GetStart();
+
+ if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition)
+ {
+ GetCopyResolver(splitPosition).AddSplit(previous, splitChild);
+ }
+
+ previous = splitChild;
+ }
+ }
+
+ foreach (KeyValuePair kv in copyResolvers)
+ {
+ CopyResolver copyResolver = kv.Value;
+
+ if (!copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ int splitPosition = kv.Key;
+
+ (IntrusiveList nodes, Operation node) = GetOperationNode(splitPosition);
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ nodes.AddBefore(node, sequence[0]);
+
+ node = sequence[0];
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ nodes.AddAfter(node, sequence[index]);
+
+ node = sequence[index];
+ }
+ }
+ }
+
+ private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg)
+ {
+ int blocksCount = cfg.Blocks.Count;
+
+ bool IsSplitEdgeBlock(BasicBlock block)
+ {
+ return block.Index >= blocksCount;
+ }
+
+ // Reset iterators to beginning because GetSplitChild depends on the state of the iterator.
+ foreach (LiveInterval interval in _intervals)
+ {
+ interval.Reset();
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ if (IsSplitEdgeBlock(block))
+ {
+ continue;
+ }
+
+ bool hasSingleOrNoSuccessor = block.SuccessorsCount <= 1;
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock successor = block.GetSuccessor(i);
+
+ int succIndex = successor.Index;
+
+ // If the current node is a split node, then the actual successor node
+ // (the successor before the split) should be right after it.
+ if (IsSplitEdgeBlock(successor))
+ {
+ succIndex = successor.GetSuccessor(0).Index;
+ }
+
+ CopyResolver copyResolver = null;
+
+ foreach (int iIndex in _blockLiveIn[succIndex])
+ {
+ LiveInterval interval = _parentIntervals[iIndex];
+
+ if (!interval.IsSplit)
+ {
+ continue;
+ }
+
+ int lEnd = _blockRanges[block.Index].End - 1;
+ int rStart = _blockRanges[succIndex].Start;
+
+ LiveInterval left = interval.GetSplitChild(lEnd);
+ LiveInterval right = interval.GetSplitChild(rStart);
+
+ if (left != default && right != default && left != right)
+ {
+ copyResolver ??= new CopyResolver();
+
+ copyResolver.AddSplit(left, right);
+ }
+ }
+
+ if (copyResolver == null || !copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ if (hasSingleOrNoSuccessor)
+ {
+ foreach (Operation operation in sequence)
+ {
+ block.Append(operation);
+ }
+ }
+ else if (successor.Predecessors.Count == 1)
+ {
+ successor.Operations.AddFirst(sequence[0]);
+
+ Operation prependNode = sequence[0];
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ Operation operation = sequence[index];
+
+ successor.Operations.AddAfter(prependNode, operation);
+
+ prependNode = operation;
+ }
+ }
+ else
+ {
+ // Split the critical edge.
+ BasicBlock splitBlock = cfg.SplitEdge(block, successor);
+
+ foreach (Operation operation in sequence)
+ {
+ splitBlock.Append(operation);
+ }
+ }
+ }
+ }
+ }
+
+ private void ReplaceLocalWithRegister(LiveInterval current)
+ {
+ Operand register = GetRegister(current);
+
+ foreach (int usePosition in current.UsePositions())
+ {
+ (_, Operation operation) = GetOperationNode(usePosition);
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (source == current.Local)
+ {
+ operation.SetSource(index, register);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress == current.Local)
+ {
+ memOp.BaseAddress = register;
+ }
+
+ if (memOp.Index == current.Local)
+ {
+ memOp.Index = register;
+ }
+ }
+ }
+
+ for (int index = 0; index < operation.DestinationsCount; index++)
+ {
+ Operand dest = operation.GetDestination(index);
+
+ if (dest == current.Local)
+ {
+ operation.SetDestination(index, register);
+ }
+ }
+ }
+ }
+
+ private static Operand GetRegister(LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
+
+ return Operand.Factory.Register(
+ interval.Register.Index,
+ interval.Register.Type,
+ interval.Local.Type);
+ }
+
+ private (IntrusiveList, Operation) GetOperationNode(int position)
+ {
+ return _operationNodes[position / InstructionGap];
+ }
+
+ private void NumberLocals(ControlFlowGraph cfg, int registersCount)
+ {
+ _operationNodes = new List<(IntrusiveList, Operation)>();
+ _intervals = new List();
+
+ for (int index = 0; index < registersCount; index++)
+ {
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
+ }
+
+ // The "visited" state is stored in the MSB of the local's value.
+ const ulong VisitedMask = 1ul << 63;
+
+ static bool IsVisited(Operand local)
+ {
+ return (local.GetValueUnsafe() & VisitedMask) != 0;
+ }
+
+ static void SetVisited(Operand local)
+ {
+ local.GetValueUnsafe() |= VisitedMask;
+ }
+
+ _operationsCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ _operationNodes.Add((block.Operations, node));
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ Operand dest = node.GetDestination(i);
+
+ if (dest.Kind == OperandKind.LocalVariable && !IsVisited(dest))
+ {
+ dest.NumberLocal(_intervals.Count);
+
+ LiveInterval interval = new LiveInterval(dest);
+ _intervals.Add(interval);
+
+ SetVisited(dest);
+
+ // If this is a copy (or copy-like operation), set the copy source interval as well.
+ // This is used for register preferencing later on, which allows the copy to be eliminated
+ // in some cases.
+ if (node.Instruction == Instruction.Copy || node.Instruction == Instruction.ZeroExtend32)
+ {
+ Operand source = node.GetSource(0);
+
+ if (source.Kind == OperandKind.LocalVariable &&
+ source.GetLocalNumber() > 0 &&
+ (node.Instruction == Instruction.Copy || source.Type == OperandType.I32))
+ {
+ interval.SetCopySource(_intervals[source.GetLocalNumber()]);
+ }
+ }
+ }
+ }
+ }
+
+ _operationsCount += block.Operations.Count * InstructionGap;
+
+ if (block.Operations.Count == 0)
+ {
+ // Pretend we have a dummy instruction on the empty block.
+ _operationNodes.Add((default, default));
+
+ _operationsCount += InstructionGap;
+ }
+ }
+
+ _parentIntervals = _intervals.ToArray();
+ }
+
+ private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context)
+ {
+ _blockRanges = new LiveRange[cfg.Blocks.Count];
+
+ int mapSize = _intervals.Count;
+
+ BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count];
+
+ // Compute local live sets.
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ BitMap liveGen = new(Allocators.Default, mapSize);
+ BitMap liveKill = new(Allocators.Default, mapSize);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ for (int i = 0; i < node.SourcesCount; i++)
+ {
+ VisitSource(node.GetSource(i));
+ }
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ VisitDestination(node.GetDestination(i));
+ }
+
+ void VisitSource(Operand source)
+ {
+ if (IsLocalOrRegister(source.Kind))
+ {
+ int id = GetOperandId(source);
+
+ if (!liveKill.IsSet(id))
+ {
+ liveGen.Set(id);
+ }
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ VisitSource(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ VisitSource(memOp.Index);
+ }
+ }
+ }
+
+ void VisitDestination(Operand dest)
+ {
+ liveKill.Set(GetOperandId(dest));
+ }
+ }
+
+ blkLiveGen[block.Index] = liveGen;
+ blkLiveKill[block.Index] = liveKill;
+ }
+
+ // Compute global live sets.
+ BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count];
+
+ for (int index = 0; index < cfg.Blocks.Count; index++)
+ {
+ blkLiveIn[index] = new BitMap(Allocators.Default, mapSize);
+ blkLiveOut[index] = new BitMap(Allocators.Default, mapSize);
+ }
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock succ = block.GetSuccessor(i);
+
+ modified |= liveOut.Set(blkLiveIn[succ.Index]);
+ }
+
+ BitMap liveIn = blkLiveIn[block.Index];
+
+ liveIn.Set(liveOut);
+ liveIn.Clear(blkLiveKill[block.Index]);
+ liveIn.Set(blkLiveGen[block.Index]);
+ }
+ }
+ while (modified);
+
+ _blockLiveIn = blkLiveIn;
+
+ _blockEdges = new HashSet();
+
+ // Compute lifetime intervals.
+ int operationPos = _operationsCount;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ // We handle empty blocks by pretending they have a dummy instruction,
+ // because otherwise the block would have the same start and end position,
+ // and this is not valid.
+ int instCount = Math.Max(block.Operations.Count, 1);
+
+ int blockStart = operationPos - instCount * InstructionGap;
+ int blockEnd = operationPos;
+
+ _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd);
+
+ _blockEdges.Add(blockStart);
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ foreach (int id in liveOut)
+ {
+ _intervals[id].AddRange(blockStart, blockEnd);
+ }
+
+ if (block.Operations.Count == 0)
+ {
+ operationPos -= InstructionGap;
+
+ continue;
+ }
+
+ for (Operation node = block.Operations.Last; node != default; node = node.ListPrevious)
+ {
+ operationPos -= InstructionGap;
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ VisitDestination(node.GetDestination(i));
+ }
+
+ for (int i = 0; i < node.SourcesCount; i++)
+ {
+ VisitSource(node.GetSource(i));
+ }
+
+ if (node.Instruction == Instruction.Call)
+ {
+ AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer);
+ AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector);
+ }
+
+ void VisitSource(Operand source)
+ {
+ if (IsLocalOrRegister(source.Kind))
+ {
+ LiveInterval interval = _intervals[GetOperandId(source)];
+
+ interval.AddRange(blockStart, operationPos + 1);
+ interval.AddUsePosition(operationPos);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ VisitSource(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ VisitSource(memOp.Index);
+ }
+ }
+ }
+
+ void VisitDestination(Operand dest)
+ {
+ LiveInterval interval = _intervals[GetOperandId(dest)];
+
+ if (interval.IsFixed)
+ {
+ interval.IsFixedAndUsed = true;
+ }
+
+ interval.SetStart(operationPos + 1);
+ interval.AddUsePosition(operationPos + 1);
+ }
+ }
+ }
+
+ foreach (LiveInterval interval in _parentIntervals)
+ {
+ interval.Reset();
+ }
+ }
+
+ private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType)
+ {
+ while (mask != 0)
+ {
+ int regIndex = BitOperations.TrailingZeroCount(mask);
+
+ Register callerSavedReg = new(regIndex, regType);
+
+ LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)];
+
+ interval.AddRange(operationPos + 1, operationPos + InstructionGap);
+
+ mask &= ~(1 << regIndex);
+ }
+ }
+
+ private static int GetOperandId(Operand operand)
+ {
+ if (operand.Kind == OperandKind.LocalVariable)
+ {
+ return operand.GetLocalNumber();
+ }
+ else if (operand.Kind == OperandKind.Register)
+ {
+ return GetRegisterId(operand.GetRegister());
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\".");
+ }
+ }
+
+ private static int GetRegisterId(Register register)
+ {
+ return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
+ }
+
+ private static bool IsLocalOrRegister(OperandKind kind)
+ {
+ return kind == OperandKind.LocalVariable ||
+ kind == OperandKind.Register;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
new file mode 100644
index 0000000..cfe1bc7
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -0,0 +1,419 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe readonly struct LiveInterval : IComparable
+ {
+ public const int NotFound = -1;
+
+ private struct Data
+ {
+ public int End;
+ public int SpillOffset;
+
+ public LiveRange FirstRange;
+ public LiveRange PrevRange;
+ public LiveRange CurrRange;
+
+ public LiveInterval Parent;
+ public LiveInterval CopySource;
+
+ public UseList Uses;
+ public LiveIntervalList Children;
+
+ public Operand Local;
+ public Register Register;
+
+ public bool IsFixed;
+ public bool IsFixedAndUsed;
+ }
+
+ private readonly Data* _data;
+
+ private ref int End => ref _data->End;
+ private ref LiveRange FirstRange => ref _data->FirstRange;
+ private ref LiveRange CurrRange => ref _data->CurrRange;
+ private ref LiveRange PrevRange => ref _data->PrevRange;
+ private ref LiveInterval Parent => ref _data->Parent;
+ private ref LiveInterval CopySource => ref _data->CopySource;
+ private ref UseList Uses => ref _data->Uses;
+ private ref LiveIntervalList Children => ref _data->Children;
+
+ public Operand Local => _data->Local;
+ public ref Register Register => ref _data->Register;
+ public ref int SpillOffset => ref _data->SpillOffset;
+
+ public bool IsFixed => _data->IsFixed;
+ public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed;
+ public bool IsEmpty => FirstRange == default;
+ public bool IsSplit => Children.Count != 0;
+ public bool IsSpilled => SpillOffset != -1;
+
+ public int UsesCount => Uses.Count;
+
+ public LiveInterval(Operand local = default, LiveInterval parent = default)
+ {
+ _data = Allocators.LiveIntervals.Allocate();
+ *_data = default;
+
+ _data->IsFixed = false;
+ _data->Local = local;
+
+ Parent = parent == default ? this : parent;
+ Uses = new UseList();
+ Children = new LiveIntervalList();
+
+ FirstRange = default;
+ CurrRange = default;
+ PrevRange = default;
+
+ SpillOffset = -1;
+ }
+
+ public LiveInterval(Register register) : this(local: default, parent: default)
+ {
+ _data->IsFixed = true;
+
+ Register = register;
+ }
+
+ public void SetCopySource(LiveInterval copySource)
+ {
+ CopySource = copySource;
+ }
+
+ public bool TryGetCopySourceRegister(out int copySourceRegIndex)
+ {
+ if (CopySource._data != null)
+ {
+ copySourceRegIndex = CopySource.Register.Index;
+
+ return true;
+ }
+
+ copySourceRegIndex = 0;
+
+ return false;
+ }
+
+ public void Reset()
+ {
+ PrevRange = default;
+ CurrRange = FirstRange;
+ }
+
+ public void Forward(int position)
+ {
+ LiveRange prev = PrevRange;
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start < position && !curr.Overlaps(position))
+ {
+ prev = curr;
+ curr = curr.Next;
+ }
+
+ PrevRange = prev;
+ CurrRange = curr;
+ }
+
+ public int GetStart()
+ {
+ Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have a start position.");
+
+ return FirstRange.Start;
+ }
+
+ public void SetStart(int position)
+ {
+ if (FirstRange != default)
+ {
+ Debug.Assert(position != FirstRange.End);
+
+ FirstRange.Start = position;
+ }
+ else
+ {
+ FirstRange = new LiveRange(position, position + 1);
+ End = position + 1;
+ }
+ }
+
+ public int GetEnd()
+ {
+ Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have an end position.");
+
+ return End;
+ }
+
+ public void AddRange(int start, int end)
+ {
+ Debug.Assert(start < end, $"Invalid range start position {start}, {end}");
+
+ if (FirstRange != default)
+ {
+ // If the new range ends exactly where the first range start, then coalesce together.
+ if (end == FirstRange.Start)
+ {
+ FirstRange.Start = start;
+
+ return;
+ }
+ // If the new range is already contained, then coalesce together.
+ else if (FirstRange.Overlaps(start, end))
+ {
+ FirstRange.Start = Math.Min(FirstRange.Start, start);
+ FirstRange.End = Math.Max(FirstRange.End, end);
+ End = Math.Max(End, end);
+
+ Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next));
+ return;
+ }
+ }
+
+ FirstRange = new LiveRange(start, end, FirstRange);
+ End = Math.Max(End, end);
+
+ Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next));
+ }
+
+ public void AddUsePosition(int position)
+ {
+ Uses.Add(position);
+ }
+
+ public bool Overlaps(int position)
+ {
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start <= position)
+ {
+ if (curr.Overlaps(position))
+ {
+ return true;
+ }
+
+ curr = curr.Next;
+ }
+
+ return false;
+ }
+
+ public bool Overlaps(LiveInterval other)
+ {
+ return GetOverlapPosition(other) != NotFound;
+ }
+
+ public int GetOverlapPosition(LiveInterval other)
+ {
+ LiveRange a = CurrRange;
+ LiveRange b = other.CurrRange;
+
+ while (a != default)
+ {
+ while (b != default && b.Start < a.Start)
+ {
+ if (a.Overlaps(b))
+ {
+ return a.Start;
+ }
+
+ b = b.Next;
+ }
+
+ if (b == default)
+ {
+ break;
+ }
+ else if (a.Overlaps(b))
+ {
+ return a.Start;
+ }
+
+ a = a.Next;
+ }
+
+ return NotFound;
+ }
+
+ public ReadOnlySpan SplitChildren()
+ {
+ return Parent.Children.Span;
+ }
+
+ public ReadOnlySpan UsePositions()
+ {
+ return Uses.Span;
+ }
+
+ public int FirstUse()
+ {
+ return Uses.FirstUse;
+ }
+
+ public int NextUseAfter(int position)
+ {
+ return Uses.NextUse(position);
+ }
+
+ public LiveInterval Split(int position)
+ {
+ LiveInterval result = new(Local, Parent)
+ {
+ End = End,
+ };
+
+ LiveRange prev = PrevRange;
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start < position && !curr.Overlaps(position))
+ {
+ prev = curr;
+ curr = curr.Next;
+ }
+
+ if (curr.Start >= position)
+ {
+ prev.Next = default;
+
+ result.FirstRange = curr;
+
+ End = prev.End;
+ }
+ else
+ {
+ result.FirstRange = new LiveRange(position, curr.End, curr.Next);
+
+ curr.End = position;
+ curr.Next = default;
+
+ End = curr.End;
+ }
+
+ result.Uses = Uses.Split(position);
+
+ AddSplitChild(result);
+
+ Debug.Assert(!IsEmpty, "Left interval is empty after split.");
+ Debug.Assert(!result.IsEmpty, "Right interval is empty after split.");
+
+ // Make sure the iterator in the new split is pointing to the start.
+ result.Reset();
+
+ return result;
+ }
+
+ private void AddSplitChild(LiveInterval child)
+ {
+ Debug.Assert(!child.IsEmpty, "Trying to insert an empty interval.");
+
+ Parent.Children.Add(child);
+ }
+
+ public LiveInterval GetSplitChild(int position)
+ {
+ if (Overlaps(position))
+ {
+ return this;
+ }
+
+ foreach (LiveInterval splitChild in SplitChildren())
+ {
+ if (splitChild.Overlaps(position))
+ {
+ return splitChild;
+ }
+ else if (splitChild.GetStart() > position)
+ {
+ break;
+ }
+ }
+
+ return default;
+ }
+
+ public bool TrySpillWithSiblingOffset()
+ {
+ foreach (LiveInterval splitChild in SplitChildren())
+ {
+ if (splitChild.IsSpilled)
+ {
+ Spill(splitChild.SpillOffset);
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public void Spill(int offset)
+ {
+ SpillOffset = offset;
+ }
+
+ public int CompareTo(LiveInterval interval)
+ {
+ if (FirstRange == default || interval.FirstRange == default)
+ {
+ return 0;
+ }
+
+ return GetStart().CompareTo(interval.GetStart());
+ }
+
+ public bool Equals(LiveInterval interval)
+ {
+ return interval._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is LiveInterval interval && Equals(interval);
+ }
+
+ public static bool operator ==(LiveInterval a, LiveInterval b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(LiveInterval a, LiveInterval b)
+ {
+ return !a.Equals(b);
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine((IntPtr)_data);
+ }
+
+ public override string ToString()
+ {
+ LiveInterval self = this;
+
+ IEnumerable GetRanges()
+ {
+ LiveRange curr = self.CurrRange;
+
+ while (curr != default)
+ {
+ if (curr == self.CurrRange)
+ {
+ yield return "*" + curr;
+ }
+ else
+ {
+ yield return curr.ToString();
+ }
+
+ curr = curr.Next;
+ }
+ }
+
+ return string.Join(", ", GetRanges());
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs
new file mode 100644
index 0000000..84b892f
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs
@@ -0,0 +1,40 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe struct LiveIntervalList
+ {
+ private LiveInterval* _items;
+ private int _count;
+ private int _capacity;
+
+ public readonly int Count => _count;
+ public readonly Span Span => new(_items, _count);
+
+ public void Add(LiveInterval interval)
+ {
+ if (_count + 1 > _capacity)
+ {
+ var oldSpan = Span;
+
+ _capacity = Math.Max(4, _capacity * 2);
+ _items = Allocators.References.Allocate((uint)_capacity);
+
+ var newSpan = Span;
+
+ oldSpan.CopyTo(newSpan);
+ }
+
+ int position = interval.GetStart();
+ int i = _count - 1;
+
+ while (i >= 0 && _items[i].GetStart() > position)
+ {
+ _items[i + 1] = _items[i--];
+ }
+
+ _items[i + 1] = interval;
+ _count++;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
new file mode 100644
index 0000000..412d597
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
@@ -0,0 +1,74 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe readonly struct LiveRange : IEquatable
+ {
+ private struct Data
+ {
+ public int Start;
+ public int End;
+ public LiveRange Next;
+ }
+
+ private readonly Data* _data;
+
+ public ref int Start => ref _data->Start;
+ public ref int End => ref _data->End;
+ public ref LiveRange Next => ref _data->Next;
+
+ public LiveRange(int start, int end, LiveRange next = default)
+ {
+ _data = Allocators.LiveRanges.Allocate();
+
+ Start = start;
+ End = end;
+ Next = next;
+ }
+
+ public bool Overlaps(int start, int end)
+ {
+ return Start < end && start < End;
+ }
+
+ public bool Overlaps(LiveRange range)
+ {
+ return Start < range.End && range.Start < End;
+ }
+
+ public bool Overlaps(int position)
+ {
+ return position >= Start && position < End;
+ }
+
+ public bool Equals(LiveRange range)
+ {
+ return range._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is LiveRange range && Equals(range);
+ }
+
+ public static bool operator ==(LiveRange a, LiveRange b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(LiveRange a, LiveRange b)
+ {
+ return !a.Equals(b);
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine((IntPtr)_data);
+ }
+
+ public override string ToString()
+ {
+ return $"[{Start}, {End})";
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
new file mode 100644
index 0000000..e6972cf
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -0,0 +1,50 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ readonly struct RegisterMasks
+ {
+ public int IntAvailableRegisters { get; }
+ public int VecAvailableRegisters { get; }
+ public int IntCallerSavedRegisters { get; }
+ public int VecCallerSavedRegisters { get; }
+ public int IntCalleeSavedRegisters { get; }
+ public int VecCalleeSavedRegisters { get; }
+ public int RegistersCount { get; }
+
+ public RegisterMasks(
+ int intAvailableRegisters,
+ int vecAvailableRegisters,
+ int intCallerSavedRegisters,
+ int vecCallerSavedRegisters,
+ int intCalleeSavedRegisters,
+ int vecCalleeSavedRegisters,
+ int registersCount)
+ {
+ IntAvailableRegisters = intAvailableRegisters;
+ VecAvailableRegisters = vecAvailableRegisters;
+ IntCallerSavedRegisters = intCallerSavedRegisters;
+ VecCallerSavedRegisters = vecCallerSavedRegisters;
+ IntCalleeSavedRegisters = intCalleeSavedRegisters;
+ VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+ RegistersCount = registersCount;
+ }
+
+ public int GetAvailableRegisters(RegisterType type)
+ {
+ if (type == RegisterType.Integer)
+ {
+ return IntAvailableRegisters;
+ }
+ else if (type == RegisterType.Vector)
+ {
+ return VecAvailableRegisters;
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid register type \"{type}\".");
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
new file mode 100644
index 0000000..13995bc
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
@@ -0,0 +1,25 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class StackAllocator
+ {
+ private int _offset;
+
+ public int TotalSize => _offset;
+
+ public int Allocate(OperandType type)
+ {
+ return Allocate(type.GetSizeInBytes());
+ }
+
+ public int Allocate(int sizeInBytes)
+ {
+ int offset = _offset;
+
+ _offset += sizeInBytes;
+
+ return offset;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs
new file mode 100644
index 0000000..806002f
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs
@@ -0,0 +1,86 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe struct UseList
+ {
+ private int* _items;
+ private int _capacity;
+
+ public int Count { get; private set; }
+
+ public readonly int FirstUse => Count > 0 ? _items[Count - 1] : LiveInterval.NotFound;
+ public readonly Span Span => new(_items, Count);
+
+ public void Add(int position)
+ {
+ if (Count + 1 > _capacity)
+ {
+ var oldSpan = Span;
+
+ _capacity = Math.Max(4, _capacity * 2);
+ _items = Allocators.Default.Allocate((uint)_capacity);
+
+ var newSpan = Span;
+
+ oldSpan.CopyTo(newSpan);
+ }
+
+ // Use positions are usually inserted in descending order, so inserting in descending order is faster,
+ // since the number of half exchanges is reduced.
+ int i = Count - 1;
+
+ while (i >= 0 && _items[i] < position)
+ {
+ _items[i + 1] = _items[i--];
+ }
+
+ _items[i + 1] = position;
+ Count++;
+ }
+
+ public readonly int NextUse(int position)
+ {
+ int index = NextUseIndex(position);
+
+ return index != LiveInterval.NotFound ? _items[index] : LiveInterval.NotFound;
+ }
+
+ public readonly int NextUseIndex(int position)
+ {
+ int i = Count - 1;
+
+ if (i == -1 || position > _items[0])
+ {
+ return LiveInterval.NotFound;
+ }
+
+ while (i >= 0 && _items[i] < position)
+ {
+ i--;
+ }
+
+ return i;
+ }
+
+ public UseList Split(int position)
+ {
+ int index = NextUseIndex(position);
+
+ // Since the list is in descending order, the new split list takes the front of the list and the current
+ // list takes the back of the list.
+ UseList result = new()
+ {
+ Count = index + 1,
+ };
+ result._capacity = result.Count;
+ result._items = _items;
+
+ Count -= result.Count;
+ _capacity = Count;
+ _items += result.Count;
+
+ return result;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
new file mode 100644
index 0000000..127b842
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindInfo
+ {
+ public const int Stride = 4; // Bytes.
+
+ public UnwindPushEntry[] PushEntries { get; }
+ public int PrologSize { get; }
+
+ public UnwindInfo(UnwindPushEntry[] pushEntries, int prologSize)
+ {
+ PushEntries = pushEntries;
+ PrologSize = prologSize;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs
new file mode 100644
index 0000000..2045019
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ enum UnwindPseudoOp
+ {
+ PushReg = 0,
+ SetFrame = 1,
+ AllocStack = 2,
+ SaveReg = 3,
+ SaveXmm128 = 4,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
new file mode 100644
index 0000000..507ace5
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindPushEntry
+ {
+ public const int Stride = 16; // Bytes.
+
+ public UnwindPseudoOp PseudoOp { get; }
+ public int PrologOffset { get; }
+ public int RegIndex { get; }
+ public int StackOffsetOrAllocSize { get; }
+
+ public UnwindPushEntry(UnwindPseudoOp pseudoOp, int prologOffset, int regIndex = -1, int stackOffsetOrAllocSize = -1)
+ {
+ PseudoOp = pseudoOp;
+ PrologOffset = prologOffset;
+ RegIndex = regIndex;
+ StackOffsetOrAllocSize = stackOffsetOrAllocSize;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/Assembler.cs b/src/ARMeilleure/CodeGen/X86/Assembler.cs
new file mode 100644
index 0000000..96f4de0
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -0,0 +1,1580 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ partial class Assembler
+ {
+ private const int ReservedBytesForJump = 1;
+
+ private const int OpModRMBits = 24;
+
+ private const byte RexPrefix = 0x40;
+ private const byte RexWPrefix = 0x48;
+ private const byte LockPrefix = 0xf0;
+
+ private const int MaxRegNumber = 15;
+
+ private struct Jump
+ {
+ public bool IsConditional { get; }
+ public X86Condition Condition { get; }
+ public Operand JumpLabel { get; }
+ public long? JumpTarget { get; set; }
+ public long JumpPosition { get; }
+ public long Offset { get; set; }
+ public int InstSize { get; set; }
+
+ public Jump(Operand jumpLabel, long jumpPosition)
+ {
+ IsConditional = false;
+ Condition = 0;
+ JumpLabel = jumpLabel;
+ JumpTarget = null;
+ JumpPosition = jumpPosition;
+
+ Offset = 0;
+ InstSize = 0;
+ }
+
+ public Jump(X86Condition condition, Operand jumpLabel, long jumpPosition)
+ {
+ IsConditional = true;
+ Condition = condition;
+ JumpLabel = jumpLabel;
+ JumpTarget = null;
+ JumpPosition = jumpPosition;
+
+ Offset = 0;
+ InstSize = 0;
+ }
+ }
+
+ private struct Reloc
+ {
+ public int JumpIndex { get; set; }
+ public int Position { get; set; }
+ public Symbol Symbol { get; set; }
+ }
+
+ private readonly List _jumps;
+ private readonly List _relocs;
+ private readonly Dictionary _labels;
+ private readonly Stream _stream;
+
+ public bool HasRelocs => _relocs != null;
+
+ public Assembler(Stream stream, bool relocatable)
+ {
+ _stream = stream;
+ _labels = new Dictionary();
+ _jumps = new List();
+
+ _relocs = relocatable ? new List() : null;
+ }
+
+ public void MarkLabel(Operand label)
+ {
+ _labels.Add(label, _stream.Position);
+ }
+
+ public void Add(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Add);
+ }
+
+ public void Addsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addsd);
+ }
+
+ public void Addss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addss);
+ }
+
+ public void And(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.And);
+ }
+
+ public void Bsr(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Bsr);
+ }
+
+ public void Bswap(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Bswap);
+ }
+
+ public void Call(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.None, X86Instruction.Call);
+ }
+
+ public void Cdq()
+ {
+ WriteByte(0x99);
+ }
+
+ public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Cmovcc];
+
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true);
+ }
+
+ public void Cmp(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Cmp);
+ }
+
+ public void Cqo()
+ {
+ WriteByte(0x48);
+ WriteByte(0x99);
+ }
+
+ public void Cmpxchg(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+ }
+
+ public void Cmpxchg16(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+ WriteByte(0x66);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+ }
+
+ public void Cmpxchg16b(Operand memOp)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, default, OperandType.None, X86Instruction.Cmpxchg16b);
+ }
+
+ public void Cmpxchg8(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg8);
+ }
+
+ public void Comisd(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, default, src2, X86Instruction.Comisd);
+ }
+
+ public void Comiss(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, default, src2, X86Instruction.Comiss);
+ }
+
+ public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
+ }
+
+ public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type);
+ }
+
+ public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type);
+ }
+
+ public void Cvtss2sd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd);
+ }
+
+ public void Div(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Div);
+ }
+
+ public void Divsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divsd);
+ }
+
+ public void Divss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divss);
+ }
+
+ public void Idiv(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Idiv);
+ }
+
+ public void Imul(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Imul128);
+ }
+
+ public void Imul(Operand dest, Operand source, OperandType type)
+ {
+ if (source.Kind != OperandKind.Register)
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+
+ WriteInstruction(dest, source, type, X86Instruction.Imul);
+ }
+
+ public void Imul(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Imul];
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\".");
+ }
+
+ if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm8, rrm: true);
+
+ WriteByte(src2.AsByte());
+ }
+ else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm32, rrm: true);
+
+ WriteInt32(src2.AsInt32());
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}.");
+ }
+ }
+
+ public void Insertps(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Insertps);
+
+ WriteByte(imm);
+ }
+
+ public void Jcc(X86Condition condition, Operand dest)
+ {
+ if (dest.Kind == OperandKind.Label)
+ {
+ _jumps.Add(new Jump(condition, dest, _stream.Position));
+
+ // ReservedBytesForJump
+ WriteByte(0);
+ }
+ else
+ {
+ throw new ArgumentException("Destination operand must be of kind Label", nameof(dest));
+ }
+ }
+
+ public void Jcc(X86Condition condition, long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte((byte)(0x70 | (int)condition));
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0x0f);
+ WriteByte((byte)(0x80 | (int)condition));
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte(0xeb);
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0xe9);
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Label)
+ {
+ _jumps.Add(new Jump(dest, _stream.Position));
+
+ // ReservedBytesForJump
+ WriteByte(0);
+ }
+ else
+ {
+ WriteInstruction(dest, default, OperandType.None, X86Instruction.Jmp);
+ }
+ }
+
+ public void Ldmxcsr(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.I32, X86Instruction.Ldmxcsr);
+ }
+
+ public void Lea(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Lea);
+ }
+
+ public void LockOr(Operand dest, Operand source, OperandType type)
+ {
+ WriteByte(LockPrefix);
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Mov(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Mov);
+ }
+
+ public void Mov16(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16);
+ }
+
+ public void Mov8(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8);
+ }
+
+ public void Movd(Operand dest, Operand source)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRMR);
+ }
+ }
+
+ public void Movdqu(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Movdqu);
+ }
+
+ public void Movhlps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movhlps);
+ }
+
+ public void Movlhps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movlhps);
+ }
+
+ public void Movq(Operand dest, Operand source)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
+
+ InstructionFlags flags = info.Flags | InstructionFlags.RexW;
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRRM, rrm: true);
+ }
+ else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRMR);
+ }
+ else
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq);
+ }
+ }
+
+ public void Movsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movsd);
+ }
+
+ public void Movss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movss);
+ }
+
+ public void Movsx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx16);
+ }
+
+ public void Movsx32(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx32);
+ }
+
+ public void Movsx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx8);
+ }
+
+ public void Movzx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx16);
+ }
+
+ public void Movzx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx8);
+ }
+
+ public void Mul(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Mul128);
+ }
+
+ public void Mulsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulsd);
+ }
+
+ public void Mulss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulss);
+ }
+
+ public void Neg(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Neg);
+ }
+
+ public void Not(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Not);
+ }
+
+ public void Or(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Pclmulqdq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pclmulqdq);
+
+ WriteByte(imm);
+ }
+
+ public void Pcmpeqw(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw);
+ }
+
+ public void Pextrb(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrw(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pop(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(dest, 0x58);
+ }
+ else
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Pop);
+ }
+ }
+
+ public void Popcnt(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Popcnt);
+ }
+
+ public void Pshufd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pshufd);
+
+ WriteByte(imm);
+ }
+
+ public void Push(Operand source)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(source, 0x50);
+ }
+ else
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Push);
+ }
+ }
+
+ public void Return()
+ {
+ WriteByte(0xc3);
+ }
+
+ public void Ror(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Ror);
+ }
+
+ public void Sar(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Sar);
+ }
+
+ public void Shl(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shl);
+ }
+
+ public void Shr(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shr);
+ }
+
+ public void Setcc(Operand dest, X86Condition condition)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Setcc];
+
+ WriteOpCode(dest, default, default, OperandType.None, info.Flags, info.OpRRM | (int)condition);
+ }
+
+ public void Stmxcsr(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.I32, X86Instruction.Stmxcsr);
+ }
+
+ public void Sub(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Sub);
+ }
+
+ public void Subsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subsd);
+ }
+
+ public void Subss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subss);
+ }
+
+ public void Test(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Test);
+ }
+
+ public void Xor(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Xor);
+ }
+
+ public void Xorps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Xorps);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand source,
+ OperandType type = OperandType.None)
+ {
+ WriteInstruction(dest, default, source, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ WriteInstruction(src1, dest, src2, inst);
+ }
+ else
+ {
+ WriteInstruction(dest, src1, src2, inst);
+ }
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, inst);
+
+ WriteByte(imm);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ Operand src3)
+ {
+ // 3+ operands can only be encoded with the VEX encoding scheme.
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte((byte)(src3.AsByte() << 4));
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ byte imm)
+ {
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte(imm);
+ }
+
+ private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ X86Register shiftReg = (X86Register)source.GetRegister().Index;
+
+ Debug.Assert(shiftReg == X86Register.Rcx, $"Invalid shift register \"{shiftReg}\".");
+
+ source = default;
+ }
+ else if (source.Kind == OperandKind.Constant)
+ {
+ source = Operand.Factory.Const((int)source.Value & (dest.Type == OperandType.I32 ? 0x1f : 0x3f));
+ }
+
+ WriteInstruction(dest, source, type, inst);
+ }
+
+ private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)inst];
+
+ if (source != default)
+ {
+ if (source.Kind == OperandKind.Constant)
+ {
+ ulong imm = source.Value;
+
+ if (inst == X86Instruction.Mov8)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (inst == X86Instruction.Mov16)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
+
+ WriteInt16((short)imm);
+ }
+ else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (!source.Relocatable && IsImm32(imm, type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
+
+ WriteInt32((int)imm);
+ }
+ else if (dest != default && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp)
+ {
+ int rexPrefix = GetRexPrefix(dest, source, type, rrm: false);
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+
+ WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111)));
+
+ if (HasRelocs && source.Relocatable)
+ {
+ _relocs.Add(new Reloc
+ {
+ JumpIndex = _jumps.Count - 1,
+ Position = (int)_stream.Position,
+ Symbol = source.Symbol,
+ });
+ }
+
+ WriteUInt64(imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(source));
+ }
+ }
+
+ private void WriteInstruction(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ X86Instruction inst,
+ OperandType type = OperandType.None)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)inst];
+
+ if (src2 != default)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ ulong imm = src2.Value;
+
+ if ((byte)imm == imm && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, src1, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(src2));
+ }
+ }
+
+ private void WriteOpCode(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool rrm = false)
+ {
+ int rexPrefix = GetRexPrefix(dest, src2, type, rrm);
+
+ if ((flags & InstructionFlags.RexW) != 0)
+ {
+ rexPrefix |= RexWPrefix;
+ }
+
+ int modRM = (opCode >> OpModRMBits) << 3;
+
+ MemoryOperand memOp = default;
+ bool hasMemOp = false;
+
+ if (dest != default)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ int regIndex = dest.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 3 : 0);
+
+ if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (dest.Kind == OperandKind.Memory)
+ {
+ memOp = dest.GetMemory();
+ hasMemOp = true;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\".");
+ }
+ }
+
+ if (src2 != default)
+ {
+ if (src2.Kind == OperandKind.Register)
+ {
+ int regIndex = src2.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 0 : 3);
+
+ if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (src2.Kind == OperandKind.Memory && !hasMemOp)
+ {
+ memOp = src2.GetMemory();
+ hasMemOp = true;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\".");
+ }
+ }
+
+ bool needsSibByte = false;
+ bool needsDisplacement = false;
+
+ int sib = 0;
+
+ if (hasMemOp)
+ {
+ // Either source or destination is a memory operand.
+ Register baseReg = memOp.BaseAddress.GetRegister();
+
+ X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111);
+
+ needsSibByte = memOp.Index != default || baseRegLow == X86Register.Rsp;
+ needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp;
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ modRM |= 0x40;
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ modRM |= 0x80;
+ }
+ }
+
+ if (baseReg.Index >= 8)
+ {
+ Debug.Assert((uint)baseReg.Index <= MaxRegNumber);
+
+ rexPrefix |= RexPrefix | (baseReg.Index >> 3);
+ }
+
+ if (needsSibByte)
+ {
+ sib = (int)baseRegLow;
+
+ if (memOp.Index != default)
+ {
+ int indexReg = memOp.Index.GetRegister().Index;
+
+ Debug.Assert(indexReg != (int)X86Register.Rsp, "Using RSP as index register on the memory operand is not allowed.");
+
+ if (indexReg >= 8)
+ {
+ Debug.Assert((uint)indexReg <= MaxRegNumber);
+
+ rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
+ }
+
+ sib |= (indexReg & 0b111) << 3;
+ }
+ else
+ {
+ sib |= 0b100 << 3;
+ }
+
+ sib |= (int)memOp.Scale << 6;
+
+ modRM |= 0b100;
+ }
+ else
+ {
+ modRM |= (int)baseRegLow;
+ }
+ }
+ else
+ {
+ // Source and destination are registers.
+ modRM |= 0xc0;
+ }
+
+ Debug.Assert(opCode != BadOp, "Invalid opcode value.");
+
+ if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
+ {
+ WriteEvexInst(dest, src1, src2, type, flags, opCode);
+
+ opCode &= 0xff;
+ }
+ else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+ {
+ // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
+
+ int vexByte2 = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 1,
+ InstructionFlags.PrefixF3 => 2,
+ InstructionFlags.PrefixF2 => 3,
+ _ => 0,
+ };
+
+ if (src1 != default)
+ {
+ vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3;
+ }
+ else
+ {
+ vexByte2 |= 0b1111 << 3;
+ }
+
+ ushort opCodeHigh = (ushort)(opCode >> 8);
+
+ if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf)
+ {
+ // Two-byte form.
+ WriteByte(0xc5);
+
+ vexByte2 |= (~rexPrefix & 4) << 5;
+
+ WriteByte((byte)vexByte2);
+ }
+ else
+ {
+ // Three-byte form.
+ WriteByte(0xc4);
+
+ int vexByte1 = (~rexPrefix & 7) << 5;
+
+ switch (opCodeHigh)
+ {
+ case 0xf:
+ vexByte1 |= 1;
+ break;
+ case 0xf38:
+ vexByte1 |= 2;
+ break;
+ case 0xf3a:
+ vexByte1 |= 3;
+ break;
+
+ default:
+ Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}.");
+ break;
+ }
+
+ vexByte2 |= (rexPrefix & 8) << 4;
+
+ WriteByte((byte)vexByte1);
+ WriteByte((byte)vexByte2);
+ }
+
+ opCode &= 0xff;
+ }
+ else
+ {
+ if (flags.HasFlag(InstructionFlags.Prefix66))
+ {
+ WriteByte(0x66);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF2))
+ {
+ WriteByte(0xf2);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF3))
+ {
+ WriteByte(0xf3);
+ }
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+ }
+
+ if (dest != default && (flags & InstructionFlags.RegOnly) != 0)
+ {
+ opCode += dest.GetRegister().Index & 7;
+ }
+
+ if ((opCode & 0xff0000) != 0)
+ {
+ WriteByte((byte)(opCode >> 16));
+ }
+
+ if ((opCode & 0xff00) != 0)
+ {
+ WriteByte((byte)(opCode >> 8));
+ }
+
+ WriteByte((byte)opCode);
+
+ if ((flags & InstructionFlags.RegOnly) == 0)
+ {
+ WriteByte((byte)modRM);
+
+ if (needsSibByte)
+ {
+ WriteByte((byte)sib);
+ }
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ WriteByte((byte)memOp.Displacement);
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ WriteInt32(memOp.Displacement);
+ }
+ }
+ }
+ }
+
+ private void WriteEvexInst(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool broadcast = false,
+ int registerWidth = 128,
+ int maskRegisterIdx = 0,
+ bool zeroElements = false)
+ {
+ int op1Idx = dest.GetRegister().Index;
+ int op2Idx = src1.GetRegister().Index;
+ int op3Idx = src2.GetRegister().Index;
+
+ WriteByte(0x62);
+
+ // P0
+ // Extend operand 1 register
+ bool r = (op1Idx & 8) == 0;
+ // Extend operand 3 register
+ bool x = (op3Idx & 16) == 0;
+ // Extend operand 3 register
+ bool b = (op3Idx & 8) == 0;
+ // Extend operand 1 register
+ bool rp = (op1Idx & 16) == 0;
+ // Escape code index
+ byte mm = 0b00;
+
+ switch ((ushort)(opCode >> 8))
+ {
+ case 0xf00:
+ mm = 0b01;
+ break;
+ case 0xf38:
+ mm = 0b10;
+ break;
+ case 0xf3a:
+ mm = 0b11;
+ break;
+
+ default:
+ Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}.");
+ break;
+ }
+
+ WriteByte(
+ (byte)(
+ (r ? 0x80 : 0) |
+ (x ? 0x40 : 0) |
+ (b ? 0x20 : 0) |
+ (rp ? 0x10 : 0) |
+ mm));
+
+ // P1
+ // Specify 64-bit lane mode
+ bool w = Is64Bits(type);
+ // Operand 2 register index
+ byte vvvv = (byte)(~op2Idx & 0b1111);
+ // Opcode prefix
+ byte pp = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 0b01,
+ InstructionFlags.PrefixF3 => 0b10,
+ InstructionFlags.PrefixF2 => 0b11,
+ _ => 0,
+ };
+ WriteByte(
+ (byte)(
+ (w ? 0x80 : 0) |
+ (vvvv << 3) |
+ 0b100 |
+ pp));
+
+ // P2
+ // Mask register determines what elements to zero, rather than what elements to merge
+ bool z = zeroElements;
+ // Specifies register-width
+ byte ll = 0b00;
+ switch (registerWidth)
+ {
+ case 128:
+ ll = 0b00;
+ break;
+ case 256:
+ ll = 0b01;
+ break;
+ case 512:
+ ll = 0b10;
+ break;
+
+ default:
+ Debug.Fail($"Invalid EVEX vector register width {registerWidth}.");
+ break;
+ }
+ // Embedded broadcast in the case of a memory operand
+ bool bcast = broadcast;
+ // Extend operand 2 register
+ bool vp = (op2Idx & 16) == 0;
+ // Mask register index
+ Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
+ byte aaa = (byte)(maskRegisterIdx & 0b111);
+
+ WriteByte(
+ (byte)(
+ (z ? 0x80 : 0) |
+ (ll << 5) |
+ (bcast ? 0x10 : 0) |
+ (vp ? 8 : 0) |
+ aaa));
+ }
+
+ private void WriteCompactInst(Operand operand, int opCode)
+ {
+ int regIndex = operand.GetRegister().Index;
+
+ if (regIndex >= 8)
+ {
+ WriteByte(0x41);
+ }
+
+ WriteByte((byte)(opCode + (regIndex & 0b111)));
+ }
+
+ private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm)
+ {
+ int rexPrefix = 0;
+
+ if (Is64Bits(type))
+ {
+ rexPrefix = RexWPrefix;
+ }
+
+ void SetRegisterHighBit(Register reg, int bit)
+ {
+ if (reg.Index >= 8)
+ {
+ rexPrefix |= RexPrefix | (reg.Index >> 3) << bit;
+ }
+ }
+
+ if (dest != default && dest.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0);
+ }
+
+ if (source != default && source.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2);
+ }
+
+ return rexPrefix;
+ }
+
+ public (byte[], RelocInfo) GetCode()
+ {
+ var jumps = CollectionsMarshal.AsSpan(_jumps);
+ var relocs = CollectionsMarshal.AsSpan(_relocs);
+
+ // Write jump relative offsets.
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int i = 0; i < jumps.Length; i++)
+ {
+ ref Jump jump = ref jumps[i];
+
+ // If jump target not resolved yet, resolve it.
+ jump.JumpTarget ??= _labels[jump.JumpLabel];
+
+ long jumpTarget = jump.JumpTarget.Value;
+ long offset = jumpTarget - jump.JumpPosition;
+
+ if (offset < 0)
+ {
+ for (int j = i - 1; j >= 0; j--)
+ {
+ ref Jump jump2 = ref jumps[j];
+
+ if (jump2.JumpPosition < jumpTarget)
+ {
+ break;
+ }
+
+ offset -= jump2.InstSize - ReservedBytesForJump;
+ }
+ }
+ else
+ {
+ for (int j = i + 1; j < jumps.Length; j++)
+ {
+ ref Jump jump2 = ref jumps[j];
+
+ if (jump2.JumpPosition >= jumpTarget)
+ {
+ break;
+ }
+
+ offset += jump2.InstSize - ReservedBytesForJump;
+ }
+
+ offset -= ReservedBytesForJump;
+ }
+
+ if (jump.IsConditional)
+ {
+ jump.InstSize = GetJccLength(offset);
+ }
+ else
+ {
+ jump.InstSize = GetJmpLength(offset);
+ }
+
+ // The jump is relative to the next instruction, not the current one.
+ // Since we didn't know the next instruction address when calculating
+ // the offset (as the size of the current jump instruction was not known),
+ // we now need to compensate the offset with the jump instruction size.
+ // It's also worth noting that:
+ // - This is only needed for backward jumps.
+ // - The GetJmpLength and GetJccLength also compensates the offset
+ // internally when computing the jump instruction size.
+ if (offset < 0)
+ {
+ offset -= jump.InstSize;
+ }
+
+ if (jump.Offset != offset)
+ {
+ jump.Offset = offset;
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+
+ // Write the code, ignoring the dummy bytes after jumps, into a new stream.
+ _stream.Seek(0, SeekOrigin.Begin);
+
+ using var codeStream = MemoryStreamManager.Shared.GetStream();
+ var assembler = new Assembler(codeStream, HasRelocs);
+
+ bool hasRelocs = HasRelocs;
+ int relocIndex = 0;
+ int relocOffset = 0;
+ var relocEntries = hasRelocs
+ ? new RelocEntry[relocs.Length]
+ : Array.Empty();
+
+ for (int i = 0; i < jumps.Length; i++)
+ {
+ ref Jump jump = ref jumps[i];
+
+ // If has relocations, calculate their new positions compensating for jumps.
+ if (hasRelocs)
+ {
+ relocOffset += jump.InstSize - ReservedBytesForJump;
+
+ for (; relocIndex < relocEntries.Length; relocIndex++)
+ {
+ ref Reloc reloc = ref relocs[relocIndex];
+
+ if (reloc.JumpIndex > i)
+ {
+ break;
+ }
+
+ relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
+ }
+ }
+
+ Span buffer = new byte[jump.JumpPosition - _stream.Position];
+
+ _stream.ReadExactly(buffer);
+ _stream.Seek(ReservedBytesForJump, SeekOrigin.Current);
+
+ codeStream.Write(buffer);
+
+ if (jump.IsConditional)
+ {
+ assembler.Jcc(jump.Condition, jump.Offset);
+ }
+ else
+ {
+ assembler.Jmp(jump.Offset);
+ }
+ }
+
+ // Write remaining relocations. This case happens when there are no jumps assembled.
+ for (; relocIndex < relocEntries.Length; relocIndex++)
+ {
+ ref Reloc reloc = ref relocs[relocIndex];
+
+ relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
+ }
+
+ _stream.CopyTo(codeStream);
+
+ var code = codeStream.ToArray();
+ var relocInfo = new RelocInfo(relocEntries);
+
+ return (code, relocInfo);
+ }
+
+ private static bool Is64Bits(OperandType type)
+ {
+ return type == OperandType.I64 || type == OperandType.FP64;
+ }
+
+ private static bool IsImm8(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS8(value);
+ }
+
+ private static bool IsImm32(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS32(value);
+ }
+
+ private static int GetJccLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset))
+ {
+ return 6;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static int GetJmpLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset))
+ {
+ return 5;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static bool ConstFitsOnS8(long value)
+ {
+ return value == (sbyte)value;
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs
new file mode 100644
index 0000000..8910e88
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs
@@ -0,0 +1,299 @@
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ partial class Assembler
+ {
+ public static bool SupportsVexPrefix(X86Instruction inst)
+ {
+ return _instTable[(int)inst].Flags.HasFlag(InstructionFlags.Vex);
+ }
+
+ private const int BadOp = 0;
+
+ [Flags]
+ [SuppressMessage("Design", "CA1069: Enums values should not be duplicated")]
+ private enum InstructionFlags
+ {
+ None = 0,
+ RegOnly = 1 << 0,
+ Reg8Src = 1 << 1,
+ Reg8Dest = 1 << 2,
+ RexW = 1 << 3,
+ Vex = 1 << 4,
+ Evex = 1 << 5,
+
+ PrefixBit = 16,
+ PrefixMask = 7 << PrefixBit,
+ Prefix66 = 1 << PrefixBit,
+ PrefixF3 = 2 << PrefixBit,
+ PrefixF2 = 4 << PrefixBit,
+ }
+
+ private readonly struct InstructionInfo
+ {
+ public int OpRMR { get; }
+ public int OpRMImm8 { get; }
+ public int OpRMImm32 { get; }
+ public int OpRImm64 { get; }
+ public int OpRRM { get; }
+
+ public InstructionFlags Flags { get; }
+
+ public InstructionInfo(
+ int opRMR,
+ int opRMImm8,
+ int opRMImm32,
+ int opRImm64,
+ int opRRM,
+ InstructionFlags flags)
+ {
+ OpRMR = opRMR;
+ OpRMImm8 = opRMImm8;
+ OpRMImm32 = opRMImm32;
+ OpRImm64 = opRImm64;
+ OpRRM = opRRM;
+ Flags = flags;
+ }
+ }
+
+ private readonly static InstructionInfo[] _instTable;
+
+ static Assembler()
+ {
+ _instTable = new InstructionInfo[(int)X86Instruction.Count];
+
+#pragma warning disable IDE0055 // Disable formatting
+ // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
+ Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None));
+ Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
+ Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Aesdec, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesenc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesimc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None));
+ Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex));
+ Add(X86Instruction.Andpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex));
+ Add(X86Instruction.Blendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3815, InstructionFlags.Prefix66));
+ Add(X86Instruction.Blendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3814, InstructionFlags.Prefix66));
+ Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None));
+ Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly));
+ Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None));
+ Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None));
+ Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex));
+ Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cmpxchg, new InstructionInfo(0x00000fb1, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
+ Add(X86Instruction.Cmpxchg8, new InstructionInfo(0x00000fb0, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
+ Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2));
+ Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66));
+ Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src));
+ Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None));
+ Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex));
+ Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Gf2p8affineqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3ace, InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None));
+ Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None));
+ Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
+ Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
+ Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
+ Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
+ Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex));
+ Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
+ Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66));
+ Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex));
+ Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex));
+ Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None));
+ Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None));
+ Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None));
+ Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None));
+ Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex));
+ Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None));
+ Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66));
+ Add(X86Instruction.Pclmulqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a44, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3));
+ Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None));
+ Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex));
+ Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex));
+ Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None));
+ Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None));
+ Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None));
+ Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
+ Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
+ Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
+ Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
+ Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
+ Add(X86Instruction.Vblendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
+ Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
+#pragma warning restore IDE0055
+
+ static void Add(X86Instruction inst, in InstructionInfo info)
+ {
+ _instTable[(int)inst] = info;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CallConvName.cs b/src/ARMeilleure/CodeGen/X86/CallConvName.cs
new file mode 100644
index 0000000..6208da1
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CallConvName.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum CallConvName
+ {
+ SystemV,
+ Windows,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CallingConvention.cs b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs
new file mode 100644
index 0000000..8433aae
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs
@@ -0,0 +1,170 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = 0xffff;
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~(1 << (int)X86Register.Rsp);
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+#pragma warning disable IDE0055 // Disable formatting
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.Rsi) |
+ (1 << (int)X86Register.Rdi) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+#pragma warning restore IDE0055
+ }
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Xmm0) |
+ (1 << (int)X86Register.Xmm1) |
+ (1 << (int)X86Register.Xmm2) |
+ (1 << (int)X86Register.Xmm3) |
+ (1 << (int)X86Register.Xmm4) |
+ (1 << (int)X86Register.Xmm5);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return RegistersMask;
+ }
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return GetIntCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return GetVecCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 4;
+ }
+
+ public static int GetIntArgumentsOnRegsCount()
+ {
+ return 6;
+ }
+
+ public static int GetVecArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static X86Register GetIntArgumentRegister(int index)
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ switch (index)
+ {
+ case 0:
+ return X86Register.Rcx;
+ case 1:
+ return X86Register.Rdx;
+ case 2:
+ return X86Register.R8;
+ case 3:
+ return X86Register.R9;
+ }
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ switch (index)
+ {
+ case 0:
+ return X86Register.Rdi;
+ case 1:
+ return X86Register.Rsi;
+ case 2:
+ return X86Register.Rdx;
+ case 3:
+ return X86Register.Rcx;
+ case 4:
+ return X86Register.R8;
+ case 5:
+ return X86Register.R9;
+ }
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetVecArgumentRegister(int index)
+ {
+ int count;
+
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ count = 4;
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ count = 8;
+ }
+
+ if ((uint)index < count)
+ {
+ return X86Register.Xmm0 + index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetIntReturnRegister()
+ {
+ return X86Register.Rax;
+ }
+
+ public static X86Register GetIntReturnRegisterHigh()
+ {
+ return X86Register.Rdx;
+ }
+
+ public static X86Register GetVecReturnRegister()
+ {
+ return X86Register.Xmm0;
+ }
+
+ public static CallConvName GetCurrentCallConv()
+ {
+ return OperatingSystem.IsWindows()
+ ? CallConvName.Windows
+ : CallConvName.SystemV;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs
new file mode 100644
index 0000000..ae83ea8
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs
@@ -0,0 +1,19 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenCommon
+ {
+ public static bool IsLongConst(Operand op)
+ {
+ long value = op.Type == OperandType.I32 ? op.AsInt32() : op.AsInt64();
+
+ return !ConstFitsOnS32(value);
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs
new file mode 100644
index 0000000..d4d4c20
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs
@@ -0,0 +1,105 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System.IO;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class CodeGenContext
+ {
+ private readonly Stream _stream;
+ private readonly Operand[] _blockLabels;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+ public BasicBlock CurrBlock { get; private set; }
+
+ public int CallArgsRegionSize { get; }
+ public int XmmSaveRegionSize { get; }
+
+ public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
+ {
+ _stream = MemoryStreamManager.Shared.GetStream();
+ _blockLabels = new Operand[blocksCount];
+
+ AllocResult = allocResult;
+ Assembler = new Assembler(_stream, relocatable);
+
+ CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize);
+ XmmSaveRegionSize = xmmSaveRegionSize;
+ }
+
+ private static int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize)
+ {
+ // We need to add 8 bytes to the total size, as the call to this function already pushed 8 bytes (the
+ // return address).
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters;
+
+ xmmSaveRegionSize = BitOperations.PopCount((uint)vecMask) * 16;
+
+ int calleeSaveRegionSize = BitOperations.PopCount((uint)intMask) * 8 + xmmSaveRegionSize + 8;
+
+ int argsCount = maxCallArgs;
+
+ if (argsCount < 0)
+ {
+ // When the function has no calls, argsCount is -1. In this case, we don't need to allocate the shadow
+ // space.
+ argsCount = 0;
+ }
+ else if (argsCount < 4)
+ {
+ // The ABI mandates that the space for at least 4 arguments is reserved on the stack (this is called
+ // shadow space).
+ argsCount = 4;
+ }
+
+ // TODO: Align XMM save region to 16 bytes because unwinding on Windows requires it.
+ int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize;
+
+ // TODO: Instead of always multiplying by 16 (the largest possible size of a variable, since a V128 has 16
+ // bytes), we should calculate the exact size consumed by the arguments passed to the called functions on
+ // the stack.
+ int callArgsAndFrameSize = frameSize + argsCount * 16;
+
+ // Ensure that the Stack Pointer will be aligned to 16 bytes.
+ callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf;
+
+ return callArgsAndFrameSize - frameSize;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ Assembler.MarkLabel(GetLabel(block));
+
+ CurrBlock = block;
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ Assembler.Jmp(GetLabel(target));
+ }
+
+ public void JumpTo(X86Condition condition, BasicBlock target)
+ {
+ Assembler.Jcc(condition, GetLabel(target));
+ }
+
+ private Operand GetLabel(BasicBlock block)
+ {
+ ref Operand label = ref _blockLabels[block.Index];
+
+ if (label == default)
+ {
+ label = Operand.Factory.Label();
+ }
+
+ return label;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs
new file mode 100644
index 0000000..9e94a07
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -0,0 +1,1891 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenerator
+ {
+ private const int RegistersCount = 16;
+ private const int PageSize = 0x1000;
+ private const int StackGuardSize = 0x2000;
+
+ private static readonly Action[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action[EnumUtils.GetCount(typeof(Instruction))];
+
+#pragma warning disable IDE0055 // Disable formatting
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.BranchIf, GenerateBranchIf);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.Compare, GenerateCompare);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
+ Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16);
+ Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+#pragma warning restore IDE0055
+
+ static void Add(Instruction inst, Action func)
+ {
+ _instTable[(int)inst] = func;
+ }
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+ {
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ BlockPlacement.RunPass(cfg);
+ }
+
+ X86Optimizer.RunPass(cfg);
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new();
+
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+
+ CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ context.EnterBlock(block);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ GenerateOperation(context, node);
+ }
+
+ if (block.SuccessorsCount == 0)
+ {
+ // The only blocks which can have 0 successors are exit blocks.
+ Operation last = block.Operations.Last;
+
+ Debug.Assert(last.Instruction == Instruction.Tailcall ||
+ last.Instruction == Instruction.Return);
+ }
+ else
+ {
+ BasicBlock succ = block.GetSuccessor(0);
+
+ if (succ != block.ListNext)
+ {
+ context.JumpTo(succ);
+ }
+ }
+ }
+
+ (byte[] code, RelocInfo relocInfo) = context.Assembler.GetCode();
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(code, unwindInfo, relocInfo);
+ }
+
+ private static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ switch (info.Type)
+ {
+ case IntrinsicType.Comis_:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ switch (operation.Intrinsic)
+ {
+ case Intrinsic.X86Comisdeq:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comisdge:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisdlt:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+
+ case Intrinsic.X86Comisseq:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comissge:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisslt:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+ }
+
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+
+ break;
+ }
+
+ case IntrinsicType.Mxcsr:
+ {
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+ Debug.Assert(offset.Type == OperandType.I32);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, offs);
+
+ Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding);
+
+ if (operation.Intrinsic == Intrinsic.X86Ldmxcsr)
+ {
+ Operand bits = operation.GetSource(1);
+ Debug.Assert(bits.Type == OperandType.I32);
+
+ context.Assembler.Mov(memOp, bits, OperandType.I32);
+ context.Assembler.Ldmxcsr(memOp);
+ }
+ else if (operation.Intrinsic == Intrinsic.X86Stmxcsr)
+ {
+ Operand dest = operation.Destination;
+ Debug.Assert(dest.Type == OperandType.I32);
+
+ context.Assembler.Stmxcsr(memOp);
+ context.Assembler.Mov(dest, memOp, OperandType.I32);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.PopCount:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Popcnt(dest, source, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Unary:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, source);
+
+ break;
+ }
+
+ case IntrinsicType.UnaryToGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
+
+ if (operation.Intrinsic == Intrinsic.X86Cvtsi2si)
+ {
+ if (dest.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32(__m128i a)
+ }
+ else /* if (dest.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64(__m128i a)
+ }
+ }
+ else
+ {
+ context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.Binary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger());
+ Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src2.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Crc32:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src1.Type.IsInteger() && src2.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src2, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Ternary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2, src3);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ if (info.Inst == X86Instruction.Blendvpd && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vblendvpd, dest, src1, src2, src3);
+ }
+ else if (info.Inst == X86Instruction.Blendvps && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vblendvps, dest, src1, src2, src3);
+ }
+ else if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
+ }
+ else
+ {
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src3.GetRegister().Index == 0);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.TernaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Fma:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ Debug.Assert(dest.Kind == OperandKind.Register && src1.Kind == OperandKind.Register && src2.Kind == OperandKind.Register);
+ Debug.Assert(src3.Kind == OperandKind.Register || src3.Kind == OperandKind.Memory);
+
+ EnsureSameType(dest, src1, src2, src3);
+ Debug.Assert(dest.Type == OperandType.V128);
+
+ Debug.Assert(dest.Value == src1.Value);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src2, src3);
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ Action func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (dest.Type.IsInteger())
+ {
+ // If Destination and Source 1 Operands are the same, perform a standard add as there are no benefits to using LEA.
+ if (dest.Kind == src1.Kind && dest.Value == src1.Value)
+ {
+ ValidateBinOp(dest, src1, src2);
+
+ context.Assembler.Add(dest, src2, dest.Type);
+ }
+ else
+ {
+ EnsureSameType(dest, src1, src2);
+
+ int offset;
+ Operand index;
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ offset = src2.AsInt32();
+ index = default;
+ }
+ else
+ {
+ offset = 0;
+ index = src2;
+ }
+
+ Operand memOp = MemoryOp(dest.Type, src1, index, Multiplier.x1, offset);
+
+ context.Assembler.Lea(dest, memOp, dest.Type);
+ }
+ }
+ else
+ {
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Addss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Addsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ // Note: GenerateCompareCommon makes the assumption that BitwiseAnd will emit only a single `and`
+ // instruction.
+ context.Assembler.And(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Xor(dest, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Xorps(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Not(dest);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Or(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToX86Condition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bswap(dest);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Call(operation.GetSource(0));
+ }
+
+ private static void GenerateClobber(CodeGenContext context, Operation operation)
+ {
+ // This is only used to indicate that a register is clobbered to the
+ // register allocator, we don't need to produce any code.
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToX86Condition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.Assembler.Setcc(dest, cond);
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+
+ private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(src1.Type.IsInteger());
+
+ if (src2.Kind == OperandKind.Constant && src2.Value == 0)
+ {
+ if (MatchOperation(operation.ListPrevious, Instruction.BitwiseAnd, src1.Type, src1.GetRegister()))
+ {
+ // Since the `test` and `and` instruction set the status flags in the same way, we can omit the
+ // `test r,r` instruction when it is immediately preceded by an `and r,*` instruction.
+ //
+ // For example:
+ //
+ // and eax, 0x3
+ // test eax, eax
+ // jz .L0
+ //
+ // =>
+ //
+ // and eax, 0x3
+ // jz .L0
+ }
+ else
+ {
+ context.Assembler.Test(src1, src1, src1.Type);
+ }
+ }
+ else
+ {
+ context.Assembler.Cmp(src1, src2, src1.Type);
+ }
+ }
+
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ Operand memOp = MemoryOp(OperandType.I64, src1);
+
+ context.Assembler.Cmpxchg16b(memOp);
+ }
+ else
+ {
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg(memOp, src3);
+ }
+ }
+
+ private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg16(memOp, src3);
+ }
+
+ private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg8(memOp, src3);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameReg(dest, src3);
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Test(src1, src1, src1.Type);
+ context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps(dest, dest, dest);
+ context.Assembler.Cvtsi2ss(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP64) */
+ {
+ context.Assembler.Cvtsd2ss(dest, dest, source);
+
+ GenerateZeroUpper96(context, dest, dest);
+ }
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps(dest, dest, dest);
+ context.Assembler.Cvtsi2sd(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP32) */
+ {
+ context.Assembler.Cvtss2sd(dest, dest, source);
+
+ GenerateZeroUpper64(context, dest, dest);
+ }
+ }
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register &&
+ source.Kind == OperandKind.Constant && source.Value == 0)
+ {
+ // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient.
+ context.Assembler.Xor(dest, dest, OperandType.I32);
+ }
+ else if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mov(dest, source, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Movdqu(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bsr(dest, source, dest.Type);
+
+ int operandSize = dest.Type == OperandType.I32 ? 32 : 64;
+ int operandMask = operandSize - 1;
+
+ // When the input operand is 0, the result is undefined, however the
+ // ZF flag is set. We are supposed to return the operand size on that
+ // case. So, add an additional jump to handle that case, by moving the
+ // operand size constant to the destination register.
+ Operand neLabel = Label();
+
+ context.Assembler.Jcc(X86Condition.NotEqual, neLabel);
+
+ context.Assembler.Mov(dest, Const(operandSize | operandMask), OperandType.I32);
+
+ context.Assembler.MarkLabel(neLabel);
+
+ // BSR returns the zero based index of the last bit set on the operand,
+ // starting from the least significant bit. However we are supposed to
+ // return the number of 0 bits on the high end. So, we invert the result
+ // of the BSR using XOR to get the correct value.
+ context.Assembler.Xor(dest, Const(operandMask), OperandType.I32);
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ if (!dest.Type.IsInteger())
+ {
+ ValidateBinOp(dest, dividend, divisor);
+ }
+
+ if (dest.Type.IsInteger())
+ {
+ divisor = operation.GetSource(2);
+
+ EnsureSameType(dest, divisor);
+
+ if (divisor.Type == OperandType.I32)
+ {
+ context.Assembler.Cdq();
+ }
+ else
+ {
+ context.Assembler.Cqo();
+ }
+
+ context.Assembler.Idiv(divisor);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Divss(dest, dividend, divisor);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Divsd(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand divisor = operation.GetSource(2);
+
+ Operand rdx = Register(X86Register.Rdx);
+
+ Debug.Assert(divisor.Type.IsInteger());
+
+ context.Assembler.Xor(rdx, rdx, OperandType.I32);
+ context.Assembler.Div(divisor);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(dest.Type, rsp, default, Multiplier.x1, offs);
+
+ GenerateLoad(context, memOp, dest);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateLoad(context, address, value);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx16(value, address, value.Type);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx8(value, address, value.Type);
+ }
+
+ private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.LockOr(MemoryOp(OperandType.I64, Register(X86Register.Rsp)), Const(0), OperandType.I32);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ context.Assembler.Imul(dest, src1, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Imul(dest, src2, dest.Type);
+ }
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Mulss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Mulsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Imul(source);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Mul(source);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Neg(dest);
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Return();
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shl(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Sar(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shr(dest, src2, dest.Type);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx16(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx32(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx8(dest, source, dest.Type);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(source.Type, rsp, default, Multiplier.x1, offs);
+
+ GenerateStore(context, memOp, source);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(OperandType.I64, rsp, default, Multiplier.x1, offs);
+
+ context.Assembler.Lea(dest, memOp, OperandType.I64);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateStore(context, address, value);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov16(address, value);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov8(address, value);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src2, dest.Type);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Subss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Subsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Jmp(operation.GetSource(0));
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ if (source.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source); // (__m128i _mm_cvtsi32_si128(int a))
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source); // (__m128i _mm_cvtsi64_si128(__int64 a))
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+
+ if (dest.Type == OperandType.I32)
+ {
+ if (index == 0)
+ {
+ context.Assembler.Movd(dest, src1);
+ }
+ else if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrd(dest, src1, index);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0);
+ context.Assembler.Movd(dest, src1);
+ context.Assembler.Pshufd(src1, src1, (byte)mask1);
+ }
+ }
+ else if (dest.Type == OperandType.I64)
+ {
+ if (index == 0)
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+ else if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrq(dest, src1, index);
+ }
+ else
+ {
+ const byte Mask = 0b01_00_11_10;
+
+ context.Assembler.Pshufd(src1, src1, Mask);
+ context.Assembler.Movq(dest, src1);
+ context.Assembler.Pshufd(src1, src1, Mask);
+ }
+ }
+ else
+ {
+ // Floating-point types.
+ if ((index >= 2 && dest.Type == OperandType.FP32) ||
+ (index == 1 && dest.Type == OperandType.FP64))
+ {
+ context.Assembler.Movhlps(dest, dest, src1);
+ context.Assembler.Movq(dest, dest);
+ }
+ else
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1)));
+ }
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Pextrw(dest, src1, index);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrb(dest, src1, index);
+ }
+ else
+ {
+ context.Assembler.Pextrw(dest, src1, (byte)(index >> 1));
+
+ if ((index & 1) != 0)
+ {
+ context.Assembler.Shr(dest, Const(8), OperandType.I32);
+ }
+ else
+ {
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ void InsertIntSse2(int words)
+ {
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Movdqu(dest, src1);
+ }
+
+ for (int word = 0; word < words; word++)
+ {
+ // Insert lower 16-bits.
+ context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
+
+ // Move next word down.
+ context.Assembler.Ror(src2, Const(16), src2.Type);
+ }
+ }
+
+ if (src2.Type == OperandType.I32)
+ {
+ Debug.Assert(index < 4);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrd(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(2);
+ }
+ }
+ else if (src2.Type == OperandType.I64)
+ {
+ Debug.Assert(index < 2);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrq(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(4);
+ }
+ }
+ else if (src2.Type == OperandType.FP32)
+ {
+ Debug.Assert(index < 4);
+
+ if (index != 0)
+ {
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
+ }
+ else
+ {
+ if (src1.GetRegister() == src2.GetRegister())
+ {
+ int mask = 0b11_10_01_00;
+
+ mask &= ~(0b11 << index * 2);
+
+ context.Assembler.Pshufd(dest, src1, (byte)mask);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0.
+ context.Assembler.Movss(dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0]
+ context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position.
+
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1.
+ }
+ }
+ }
+ }
+ else
+ {
+ context.Assembler.Movss(dest, src1, src2);
+ }
+ }
+ else /* if (src2.Type == OperandType.FP64) */
+ {
+ Debug.Assert(index < 2);
+
+ if (index != 0)
+ {
+ context.Assembler.Movlhps(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.Movsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrw(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ // It's not possible to emulate this instruction without
+ // SSE 4.1 support without the use of a temporary register,
+ // so we instead handle that case on the pre-allocator when
+ // SSE 4.1 is not supported on the CPU.
+ Debug.Assert(HardwareCapabilities.SupportsSse41);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrb(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Pcmpeqw(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Xorps(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper64(context, dest, source);
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper96(context, dest, source);
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx16(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ // We can eliminate the move if source is already 32-bit and the registers are the same.
+ if (dest.Value == source.Value && source.Type == OperandType.I32)
+ {
+ return;
+ }
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx8(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32:
+ context.Assembler.Mov(value, address, OperandType.I32);
+ break;
+ case OperandType.I64:
+ context.Assembler.Mov(value, address, OperandType.I64);
+ break;
+ case OperandType.FP32:
+ context.Assembler.Movd(value, address);
+ break;
+ case OperandType.FP64:
+ context.Assembler.Movq(value, address);
+ break;
+ case OperandType.V128:
+ context.Assembler.Movdqu(value, address);
+ break;
+
+ default:
+ Debug.Assert(false);
+ break;
+ }
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32:
+ context.Assembler.Mov(address, value, OperandType.I32);
+ break;
+ case OperandType.I64:
+ context.Assembler.Mov(address, value, OperandType.I64);
+ break;
+ case OperandType.FP32:
+ context.Assembler.Movd(address, value);
+ break;
+ case OperandType.FP64:
+ context.Assembler.Movq(address, value);
+ break;
+ case OperandType.V128:
+ context.Assembler.Movdqu(address, value);
+ break;
+
+ default:
+ Debug.Assert(false);
+ break;
+ }
+ }
+
+ private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ }
+
+ private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ context.Assembler.Pshufd(dest, dest, 0xfc);
+ }
+
+ private static bool MatchOperation(Operation node, Instruction inst, OperandType destType, Register destReg)
+ {
+ if (node == default || node.DestinationsCount == 0)
+ {
+ return false;
+ }
+
+ if (node.Instruction != inst)
+ {
+ return false;
+ }
+
+ Operand dest = node.Destination;
+
+ return dest.Kind == OperandKind.Register &&
+ dest.Type == destType &&
+ dest.GetRegister() == destReg;
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+ EnsureSameReg(dest, source);
+ EnsureSameType(dest, source);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+ EnsureSameReg(dest, src1);
+ EnsureSameType(dest, src1, src2);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+ EnsureSameReg(dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding)
+ {
+ return;
+ }
+
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List pushEntries = new();
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ context.Assembler.Push(Register((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: bit));
+
+ mask &= ~(1 << bit);
+ }
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ if (reservedStackSize >= StackGuardSize)
+ {
+ GenerateInlineStackProbe(context, reservedStackSize);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Sub(rsp, Const(reservedStackSize), OperandType.I64);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.AllocStack, context.StreamOffset, stackOffsetOrAllocSize: reservedStackSize));
+ }
+
+ int offset = reservedStackSize;
+
+ mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ offset -= 16;
+
+ Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.SaveXmm128, context.StreamOffset, bit, offset));
+
+ mask &= ~(1 << bit);
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(X86Register.Rsp);
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ int offset = reservedStackSize;
+
+ int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ offset -= 16;
+
+ Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
+
+ mask &= ~(1 << bit);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Add(rsp, Const(reservedStackSize), OperandType.I64);
+ }
+
+ mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.HighestBitSet(mask);
+
+ context.Assembler.Pop(Register((X86Register)bit));
+
+ mask &= ~(1 << bit);
+ }
+ }
+
+ private static void GenerateInlineStackProbe(CodeGenContext context, int size)
+ {
+ // Windows does lazy stack allocation, and there are just 2
+ // guard pages on the end of the stack. So, if the allocation
+ // size we make is greater than this guard size, we must ensure
+ // that the OS will map all pages that we'll use. We do that by
+ // doing a dummy read on those pages, forcing a page fault and
+ // the OS to map them. If they are already mapped, nothing happens.
+ const int PageMask = PageSize - 1;
+
+ size = (size + PageMask) & ~PageMask;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand temp = Register(CallingConvention.GetIntReturnRegister());
+
+ for (int offset = PageSize; offset < size; offset += PageSize)
+ {
+ Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, -offset);
+
+ context.Assembler.Mov(temp, memOp, OperandType.I32);
+ }
+ }
+
+ private static Operand Memory(Operand operand, OperandType type)
+ {
+ if (operand.Kind == OperandKind.Memory)
+ {
+ return operand;
+ }
+
+ return MemoryOp(type, operand);
+ }
+
+ private static Operand Register(X86Register register, OperandType type = OperandType.I64)
+ {
+ return Operand.Factory.Register((int)register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(X86Register register)
+ {
+ return Operand.Factory.Register((int)register, RegisterType.Vector, OperandType.V128);
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
new file mode 100644
index 0000000..4f6f1e8
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -0,0 +1,144 @@
+using Ryujinx.Memory;
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class HardwareCapabilities
+ {
+ private delegate uint GetXcr0();
+
+ static HardwareCapabilities()
+ {
+ if (!X86Base.IsSupported)
+ {
+ return;
+ }
+
+ (int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000);
+
+ (_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000);
+ FeatureInfo1Edx = (FeatureFlags1Edx)edx1;
+ FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1;
+
+ if (maxNum >= 7)
+ {
+ (_, int ebx7, int ecx7, _) = X86Base.CpuId(0x00000007, 0x00000000);
+ FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
+ FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
+ }
+
+ Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax();
+ }
+
+ private static uint GetXcr0Eax()
+ {
+ if (!FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave))
+ {
+ // XSAVE feature required for xgetbv
+ return 0;
+ }
+
+ ReadOnlySpan asmGetXcr0 = new byte[]
+ {
+ 0x31, 0xc9, // xor ecx, ecx
+ 0xf, 0x01, 0xd0, // xgetbv
+ 0xc3, // ret
+ };
+
+ using MemoryBlock memGetXcr0 = new((ulong)asmGetXcr0.Length);
+
+ memGetXcr0.Write(0, asmGetXcr0);
+
+ memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute);
+
+ var fGetXcr0 = Marshal.GetDelegateForFunctionPointer(memGetXcr0.Pointer);
+
+ return fGetXcr0();
+ }
+
+ [Flags]
+ public enum FeatureFlags1Edx
+ {
+ Sse = 1 << 25,
+ Sse2 = 1 << 26,
+ }
+
+ [Flags]
+ public enum FeatureFlags1Ecx
+ {
+ Sse3 = 1 << 0,
+ Pclmulqdq = 1 << 1,
+ Ssse3 = 1 << 9,
+ Fma = 1 << 12,
+ Sse41 = 1 << 19,
+ Sse42 = 1 << 20,
+ Popcnt = 1 << 23,
+ Aes = 1 << 25,
+ Xsave = 1 << 26,
+ Osxsave = 1 << 27,
+ Avx = 1 << 28,
+ F16c = 1 << 29,
+ }
+
+ [Flags]
+ public enum FeatureFlags7Ebx
+ {
+ Avx2 = 1 << 5,
+ Avx512f = 1 << 16,
+ Avx512dq = 1 << 17,
+ Sha = 1 << 29,
+ Avx512bw = 1 << 30,
+ Avx512vl = 1 << 31,
+ }
+
+ [Flags]
+ public enum FeatureFlags7Ecx
+ {
+ Gfni = 1 << 8,
+ }
+
+ [Flags]
+ public enum Xcr0FlagsEax
+ {
+ Sse = 1 << 1,
+ YmmHi128 = 1 << 2,
+ Opmask = 1 << 5,
+ ZmmHi256 = 1 << 6,
+ Hi16Zmm = 1 << 7,
+ }
+
+ public static FeatureFlags1Edx FeatureInfo1Edx { get; }
+ public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
+ public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
+ public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
+ public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0;
+
+ public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
+ public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
+ public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3);
+ public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq);
+ public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3);
+ public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma);
+ public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41);
+ public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
+ public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
+ public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
+ public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128);
+ public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
+ public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave)
+ && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm);
+ public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F;
+ public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F;
+ public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F;
+ public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
+ public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
+ public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
+
+ public static bool ForceLegacySse { get; set; }
+
+ public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
+ public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse;
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
new file mode 100644
index 0000000..16054c6
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ readonly struct IntrinsicInfo
+ {
+ public X86Instruction Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(X86Instruction inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
new file mode 100644
index 0000000..daa1f8f
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -0,0 +1,202 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class IntrinsicTable
+ {
+ private static readonly IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+#pragma warning disable IDE0055 // Disable formatting
+ Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary));
+ Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andps, new IntrinsicInfo(X86Instruction.Andps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Blendvpd, new IntrinsicInfo(X86Instruction.Blendvpd, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Blendvps, new IntrinsicInfo(X86Instruction.Blendvps, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtsi2sd, new IntrinsicInfo(X86Instruction.Cvtsi2sd, IntrinsicType.BinaryGpr));
+ Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsi2ss, new IntrinsicInfo(X86Instruction.Cvtsi2ss, IntrinsicType.BinaryGpr));
+ Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtss2si, new IntrinsicInfo(X86Instruction.Cvtss2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Gf2p8affineqb, new IntrinsicInfo(X86Instruction.Gf2p8affineqb, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Ldmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
+ Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Pclmulqdq, new IntrinsicInfo(X86Instruction.Pclmulqdq, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount));
+ Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufd, new IntrinsicInfo(X86Instruction.Pshufd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary));
+ Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Stmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
+ Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231pd, new IntrinsicInfo(X86Instruction.Vfnmadd231pd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
+#pragma warning restore IDE0055
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs
new file mode 100644
index 0000000..7c3ef35
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum IntrinsicType
+ {
+ Comis_,
+ Mxcsr,
+ PopCount,
+ Unary,
+ UnaryToGpr,
+ Binary,
+ BinaryGpr,
+ BinaryImm,
+ Crc32,
+ Ternary,
+ TernaryImm,
+ Fma,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/Mxcsr.cs b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs
new file mode 100644
index 0000000..719afe5
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs
@@ -0,0 +1,15 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ [Flags]
+ enum Mxcsr
+ {
+ Ftz = 1 << 15, // Flush To Zero.
+ Rhi = 1 << 14, // Round Mode high bit.
+ Rlo = 1 << 13, // Round Mode low bit.
+ Um = 1 << 11, // Underflow Mask.
+ Dm = 1 << 8, // Denormal Mask.
+ Daz = 1 << 6, // Denormals Are Zero.
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocator.cs b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs
new file mode 100644
index 0000000..590c35c
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -0,0 +1,788 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocator
+ {
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ Span buffer = default;
+
+ CallConvName callConv = CallingConvention.GetCurrentCallConv();
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ InsertConstantRegCopies(block.Operations, node);
+ InsertDestructiveRegCopies(block.Operations, node);
+ InsertConstrainedRegCopies(block.Operations, node);
+
+ switch (node.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = node.SourcesCount - 1;
+
+ if (node.Destination != default && node.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertCallCopies(block.Operations, stackAlloc, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ PreAllocatorSystemV.InsertCallCopies(block.Operations, node);
+ }
+ break;
+
+ case Instruction.ConvertToFPUI:
+ GenerateConvertToFPUI(block.Operations, node);
+ break;
+
+ case Instruction.LoadArgument:
+ if (callConv == CallConvName.Windows)
+ {
+ nextNode = PreAllocatorWindows.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ nextNode = PreAllocatorSystemV.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (!node.GetSource(0).Type.IsInteger())
+ {
+ GenerateNegate(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Return:
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertReturnCopy(cctx, block.Operations, preservedArgs, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ PreAllocatorSystemV.InsertReturnCopy(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Tailcall:
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertTailcallCopies(block.Operations, node);
+ }
+ else
+ {
+ PreAllocatorSystemV.InsertTailcallCopies(block.Operations, node);
+ }
+ break;
+
+ case Instruction.VectorInsert8:
+ if (!HardwareCapabilities.SupportsSse41)
+ {
+ GenerateVectorInsert8(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Extended:
+ if (node.Intrinsic == Intrinsic.X86Ldmxcsr)
+ {
+ int stackOffset = stackAlloc.Allocate(OperandType.I32);
+
+ node.SetSources(new Operand[] { Const(stackOffset), node.GetSource(0) });
+ }
+ else if (node.Intrinsic == Intrinsic.X86Stmxcsr)
+ {
+ int stackOffset = stackAlloc.Allocate(OperandType.I32);
+
+ node.SetSources(new Operand[] { Const(stackOffset) });
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ protected static void InsertConstantRegCopies(IntrusiveList nodes, Operation node)
+ {
+ if (node.SourcesCount == 0 || IsXmmIntrinsic(node))
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand src1 = node.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddXmmCopy(nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(inst))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(node))
+ {
+ src2 = node.GetSource(1);
+
+ (src2, src1) = (src1, src2);
+
+ node.SetSource(0, src1);
+ node.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddCopy(nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (node.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = node.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddXmmCopy(nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst) || CodeGenCommon.IsLongConst(src2))
+ {
+ src2 = AddCopy(nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ }
+ }
+
+ protected static void InsertConstrainedRegCopies(IntrusiveList nodes, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ switch (node.Instruction)
+ {
+ case Instruction.CompareAndSwap:
+ case Instruction.CompareAndSwap16:
+ case Instruction.CompareAndSwap8:
+ {
+ OperandType type = node.GetSource(1).Type;
+
+ if (type == OperandType.V128)
+ {
+ // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
+ // - The expected value should be in RDX:RAX.
+ // - The new value to be written should be in RCX:RBX.
+ // - The value at the memory location is loaded to RDX:RAX.
+ void SplitOperand(Operand source, Operand lr, Operand hr)
+ {
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, lr, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, hr, source, Const(1)));
+ }
+
+ Operand rax = Gpr(X86Register.Rax, OperandType.I64);
+ Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+ Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+
+ SplitOperand(node.GetSource(1), rax, rdx);
+ SplitOperand(node.GetSource(2), rbx, rcx);
+
+ Operation operation = node;
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, rax));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+ operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
+ }
+ else
+ {
+ // Handle the many restrictions of the compare and exchange (32/64) instruction:
+ // - The expected value should be in (E/R)AX.
+ // - The value at the memory location is loaded to (E/R)AX.
+ Operand expected = node.GetSource(1);
+ Operand newValue = node.GetSource(2);
+
+ Operand rax = Gpr(X86Register.Rax, expected.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, expected));
+
+ // We need to store the new value into a temp, since it may
+ // be a constant, and this instruction does not support immediate operands.
+ Operand temp = Local(newValue.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, newValue));
+
+ node.SetSources(new Operand[] { node.GetSource(0), rax, temp });
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax));
+
+ node.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Divide:
+ case Instruction.DivideUI:
+ {
+ // Handle the many restrictions of the division instructions:
+ // - The dividend is always in RDX:RAX.
+ // - The result is always in RAX.
+ // - Additionally it also writes the remainder in RDX.
+ if (dest.Type.IsInteger())
+ {
+ Operand src1 = node.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1));
+ nodes.AddBefore(node, Operation(Instruction.Clobber, rdx));
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax));
+
+ node.SetSources(new Operand[] { rdx, rax, node.GetSource(1) });
+ node.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Extended:
+ {
+ bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd ||
+ node.Intrinsic == Intrinsic.X86Blendvps ||
+ node.Intrinsic == Intrinsic.X86Pblendvb;
+
+ // BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
+ // SHA256RNDS2 always has an implied XMM0 as a last operand.
+ if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2)
+ {
+ Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, xmm0, node.GetSource(2)));
+
+ node.SetSource(2, xmm0);
+ }
+
+ break;
+ }
+
+ case Instruction.Multiply64HighSI:
+ case Instruction.Multiply64HighUI:
+ {
+ // Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
+ // - The multiplicand is always in RAX.
+ // - The lower 64-bits of the result is always in RAX.
+ // - The higher 64-bits of the result is always in RDX.
+ Operand src1 = node.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1));
+
+ node.SetSource(0, rax);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rdx));
+
+ node.SetDestinations(new Operand[] { rdx, rax });
+
+ break;
+ }
+
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ {
+ // The shift register is always implied to be CL (low 8-bits of RCX or ECX).
+ if (node.GetSource(1).Kind == OperandKind.LocalVariable)
+ {
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rcx, node.GetSource(1)));
+
+ node.SetSource(1, rcx);
+ }
+
+ break;
+ }
+ }
+ }
+
+ protected static void InsertDestructiveRegCopies(IntrusiveList nodes, Operation node)
+ {
+ if (node.Destination == default || node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0);
+
+ // The multiply instruction (that maps to IMUL) is somewhat special, it has
+ // a three operand form where the second source is a immediate value.
+ bool threeOperandForm = inst == Instruction.Multiply && node.GetSource(1).Kind == OperandKind.Constant;
+
+ if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ if (node.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+
+ node.SetSource(0, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+
+ node.SetSource(0, dest);
+ }
+ }
+ else if (inst == Instruction.ConditionalSelect)
+ {
+ Operand src2 = node.GetSource(1);
+ Operand src3 = node.GetSource(2);
+
+ if (src1 == dest || src2 == dest)
+ {
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src3));
+
+ node.SetSource(2, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src3));
+
+ node.SetSource(2, dest);
+ }
+ }
+ }
+
+ private static void GenerateConvertToFPUI(IntrusiveList nodes, Operation node)
+ {
+ // Unsigned integer to FP conversions are not supported on X86.
+ // We need to turn them into signed integer to FP conversions, and
+ // adjust the final result.
+ Operand dest = node.Destination;
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
+
+ Operation currentNode = node;
+
+ if (source.Type == OperandType.I32)
+ {
+ // For 32-bits integers, we can just zero-extend to 64-bits,
+ // and then use the 64-bits signed conversion instructions.
+ Operand zex = Local(OperandType.I64);
+
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend32, zex, source));
+ nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, zex));
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ // For 64-bits integers, we need to do the following:
+ // - Ensure that the integer has the most significant bit clear.
+ // -- This can be done by shifting the value right by 1, that is, dividing by 2.
+ // -- The least significant bit is lost in this case though.
+ // - We can then convert the shifted value with a signed integer instruction.
+ // - The result still needs to be corrected after that.
+ // -- First, we need to multiply the result by 2, as we divided it by 2 before.
+ // --- This can be done efficiently by adding the result to itself.
+ // -- Then, we need to add the least significant bit that was shifted out.
+ // --- We can convert the least significant bit to float, and add it to the result.
+ Operand lsb = Local(OperandType.I64);
+ Operand half = Local(OperandType.I64);
+
+ Operand lsbF = Local(dest.Type);
+
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, lsb, source));
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, half, source));
+
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L)));
+ node = nodes.AddAfter(node, Operation(Instruction.ShiftRightUI, half, half, Const(1)));
+
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, lsbF, lsb));
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, half));
+
+ node = nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, dest));
+ nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, lsbF));
+ }
+
+ Delete(nodes, currentNode);
+ }
+
+ private static void GenerateNegate(IntrusiveList nodes, Operation node)
+ {
+ // There's no SSE FP negate instruction, so we need to transform that into
+ // a XOR of the value to be negated with a mask with the highest bit set.
+ // This also produces -0 for a negation of the value 0.
+ Operand dest = node.Destination;
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 ||
+ dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
+
+ Operation currentNode = node;
+
+ Operand res = Local(dest.Type);
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorOne, res));
+
+ if (dest.Type == OperandType.FP32)
+ {
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Pslld, res, res, Const(31)));
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Psllq, res, res, Const(63)));
+ }
+
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Xorps, res, res, source));
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, res));
+
+ Delete(nodes, currentNode);
+ }
+
+ private static void GenerateVectorInsert8(IntrusiveList nodes, Operation node)
+ {
+ // Handle vector insertion, when SSE 4.1 is not supported.
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0); // Vector
+ Operand src2 = node.GetSource(1); // Value
+ Operand src3 = node.GetSource(2); // Index
+
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ Debug.Assert(index < 16);
+
+ Operation currentNode = node;
+
+ Operand temp1 = Local(OperandType.I32);
+ Operand temp2 = Local(OperandType.I32);
+
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, temp2, src2));
+
+ Operation vextOp = Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1));
+
+ node = nodes.AddAfter(node, vextOp);
+
+ if ((index & 1) != 0)
+ {
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp1, temp1));
+ node = nodes.AddAfter(node, Operation(Instruction.ShiftLeft, temp2, temp2, Const(8)));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+ else
+ {
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp2, temp2));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00)));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+
+ Operation vinsOp = Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1));
+
+ nodes.AddAfter(node, vinsOp);
+
+ Delete(nodes, currentNode);
+ }
+
+ protected static Operand AddXmmCopy(IntrusiveList nodes, Operation node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+ Operand intConst = AddCopy(nodes, node, GetIntConst(source));
+
+ Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ protected static Operand AddCopy(IntrusiveList nodes, Operation node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, temp, source);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ protected static void Delete(IntrusiveList nodes, Operation node)
+ {
+ node.Destination = default;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ nodes.Remove(node);
+ }
+
+ protected static Operand Gpr(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Integer, type);
+ }
+
+ protected static Operand Xmm(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+ case Instruction.Multiply:
+ case Instruction.Subtract:
+ return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger();
+
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return true;
+
+ case Instruction.Divide:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return !HardwareCapabilities.SupportsVexEncoding;
+
+ case Instruction.Extended:
+ return IsIntrinsicSameOperandDestSrc1(operation);
+ }
+
+ return IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsIntrinsicSameOperandDestSrc1(Operation operation)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ return info.Type == IntrinsicType.Crc32 || info.Type == IntrinsicType.Fma || IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsVexSameOperandDestSrc1(Operation operation)
+ {
+ if (IsIntrinsic(operation.Instruction))
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ bool hasVex = HardwareCapabilities.SupportsVexEncoding && Assembler.SupportsVexPrefix(info.Inst);
+
+ bool isUnary = operation.SourcesCount < 2;
+
+ bool hasVecDest = operation.Destination != default && operation.Destination.Type == OperandType.V128;
+
+ return !hasVex && !isUnary && hasVecDest;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc1(Instruction inst)
+ {
+ return inst switch
+ {
+ Instruction.Copy or Instruction.LoadArgument or Instruction.Spill or Instruction.SpillArg => true,
+ _ => false,
+ };
+ }
+
+ private static bool HasConstSrc2(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Multiply:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Store:
+ case Instruction.Store16:
+ case Instruction.Store8:
+ case Instruction.Subtract:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.Multiply:
+ return true;
+
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var compType = (Comparison)comp.AsInt32();
+
+ return compType == Comparison.Equal || compType == Comparison.NotEqual;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+
+ private static bool IsXmmIntrinsic(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Extended)
+ {
+ return false;
+ }
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ return info.Type != IntrinsicType.Crc32;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs
new file mode 100644
index 0000000..e754cb0
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs
@@ -0,0 +1,333 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocatorSystemV : PreAllocator
+ {
+ public static void InsertCallCopies(IntrusiveList nodes, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ List sources = new()
+ {
+ node.GetSource(0),
+ };
+
+ int argsCount = node.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = Const(stackOffset);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp));
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ node.SetSources(sources.ToArray());
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ Operation operation = node;
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ node.Destination = retReg;
+ }
+ }
+ }
+
+ public static void InsertTailcallCopies(IntrusiveList nodes, Operation node)
+ {
+ List sources = new()
+ {
+ node.GetSource(0),
+ };
+
+ int argsCount = node.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(1 + index);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ node.SetSources(sources.ToArray());
+ }
+
+ public static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span buffer,
+ IntrusiveList nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation nextNode;
+
+ if (dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ public static void InsertReturnCopy(IntrusiveList nodes, Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs
new file mode 100644
index 0000000..10a2bd1
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs
@@ -0,0 +1,327 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocatorWindows : PreAllocator
+ {
+ public static void InsertCallCopies(IntrusiveList nodes, StackAllocator stackAlloc, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ // Handle struct arguments.
+ int retArgs = 0;
+ int stackAllocOffset = 0;
+
+ int AllocateOnStack(int size)
+ {
+ // We assume that the stack allocator is initially empty (TotalSize = 0).
+ // Taking that into account, we can reuse the space allocated for other
+ // calls by keeping track of our own allocated size (stackAllocOffset).
+ // If the space allocated is not big enough, then we just expand it.
+ int offset = stackAllocOffset;
+
+ if (stackAllocOffset + size > stackAlloc.TotalSize)
+ {
+ stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize);
+ }
+
+ stackAllocOffset += size;
+
+ return offset;
+ }
+
+ Operand arg0Reg = default;
+
+ if (dest != default && dest.Type == OperandType.V128)
+ {
+ int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes());
+
+ arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation allocOp = Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
+
+ nodes.AddBefore(node, allocOp);
+
+ retArgs = 1;
+ }
+
+ int argsCount = node.SourcesCount - 1;
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
+
+ if (argsCount > maxArgs)
+ {
+ argsCount = maxArgs;
+ }
+
+ Operand[] sources = new Operand[1 + retArgs + argsCount];
+
+ sources[0] = node.GetSource(0);
+
+ if (arg0Reg != default)
+ {
+ sources[1] = arg0Reg;
+ }
+
+ for (int index = 1; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand stackAddr = Local(OperandType.I64);
+
+ int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes());
+
+ nodes.AddBefore(node, Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset)));
+
+ Operation storeOp = Operation(Instruction.Store, default, stackAddr, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, storeOp));
+
+ node.SetSource(index, stackAddr);
+ }
+ }
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+ Operand argReg;
+
+ int argIndex = index + retArgs;
+
+ if (source.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type);
+ }
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources[1 + retArgs + index] = argReg;
+ }
+
+ // The remaining arguments (those that are not passed on registers)
+ // should be passed on the stack, we write them to the stack with "SpillArg".
+ for (int index = argsCount; index < node.SourcesCount - 1; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+ Operand offset = Const((index + retArgs) * 8);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp));
+ }
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retValueAddr = Local(OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, retValueAddr, arg0Reg));
+
+ Operation loadOp = Operation(Instruction.Load, dest, retValueAddr);
+
+ nodes.AddAfter(node, loadOp);
+
+ node.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ node.Destination = retReg;
+ }
+ }
+
+ node.SetSources(sources);
+ }
+
+ public static void InsertTailcallCopies(IntrusiveList nodes, Operation node)
+ {
+ int argsCount = node.SourcesCount - 1;
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount();
+
+ if (argsCount > maxArgs)
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+
+ Operand[] sources = new Operand[1 + argsCount];
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(1 + index);
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(index), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(index), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources[1 + index] = argReg;
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ node.SetSources(sources);
+ }
+
+ public static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span buffer,
+ IntrusiveList nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0;
+
+ int index = source.AsInt32() + retArgs;
+
+ if (index < CallingConvention.GetArgumentsOnRegsCount())
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ Operand argReg, pArg;
+
+ if (dest.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type);
+ pArg = Local(dest.Type);
+ }
+ else if (dest.Type == OperandType.V128)
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64);
+ pArg = Local(OperandType.I64);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type);
+ pArg = Local(dest.Type);
+ }
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+
+ Operation nextNode;
+
+ if (dest.Type != OperandType.V128 && dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(dest.Type == OperandType.V128
+ ? Instruction.Load
+ : Instruction.Copy, dest, preservedArgs[index]);
+
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ public static void InsertReturnCopy(
+ CompilerContext cctx,
+ IntrusiveList nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+ Operand retReg;
+
+ if (source.Type.IsInteger())
+ {
+ retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ if (preservedArgs[0] == default)
+ {
+ Operand preservedArg = Local(OperandType.I64);
+ Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation copyOp = Operation(Instruction.Copy, preservedArg, arg0);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[0] = preservedArg;
+ }
+
+ retReg = preservedArgs[0];
+ }
+ else
+ {
+ retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+ }
+
+ if (source.Type == OperandType.V128)
+ {
+ Operation retStoreOp = Operation(Instruction.Store, default, retReg, source);
+
+ nodes.AddBefore(node, retStoreOp);
+ }
+ else
+ {
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+
+ node.SetSources(Array.Empty());
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/X86Condition.cs b/src/ARMeilleure/CodeGen/X86/X86Condition.cs
new file mode 100644
index 0000000..70699a2
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Condition.cs
@@ -0,0 +1,49 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Condition
+ {
+ Overflow = 0x0,
+ NotOverflow = 0x1,
+ Below = 0x2,
+ AboveOrEqual = 0x3,
+ Equal = 0x4,
+ NotEqual = 0x5,
+ BelowOrEqual = 0x6,
+ Above = 0x7,
+ Sign = 0x8,
+ NotSign = 0x9,
+ ParityEven = 0xa,
+ ParityOdd = 0xb,
+ Less = 0xc,
+ GreaterOrEqual = 0xd,
+ LessOrEqual = 0xe,
+ Greater = 0xf,
+ }
+
+ static class ComparisonX86Extensions
+ {
+ public static X86Condition ToX86Condition(this Comparison comp)
+ {
+ return comp switch
+ {
+#pragma warning disable IDE0055 // Disable formatting
+ Comparison.Equal => X86Condition.Equal,
+ Comparison.NotEqual => X86Condition.NotEqual,
+ Comparison.Greater => X86Condition.Greater,
+ Comparison.LessOrEqual => X86Condition.LessOrEqual,
+ Comparison.GreaterUI => X86Condition.Above,
+ Comparison.LessOrEqualUI => X86Condition.BelowOrEqual,
+ Comparison.GreaterOrEqual => X86Condition.GreaterOrEqual,
+ Comparison.Less => X86Condition.Less,
+ Comparison.GreaterOrEqualUI => X86Condition.AboveOrEqual,
+ Comparison.LessUI => X86Condition.Below,
+#pragma warning restore IDE0055
+
+ _ => throw new ArgumentException(null, nameof(comp)),
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/X86Instruction.cs b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs
new file mode 100644
index 0000000..e197901
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -0,0 +1,231 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Instruction
+ {
+ None,
+ Add,
+ Addpd,
+ Addps,
+ Addsd,
+ Addss,
+ Aesdec,
+ Aesdeclast,
+ Aesenc,
+ Aesenclast,
+ Aesimc,
+ And,
+ Andnpd,
+ Andnps,
+ Andpd,
+ Andps,
+ Blendvpd,
+ Blendvps,
+ Bsr,
+ Bswap,
+ Call,
+ Cmovcc,
+ Cmp,
+ Cmppd,
+ Cmpps,
+ Cmpsd,
+ Cmpss,
+ Cmpxchg,
+ Cmpxchg16b,
+ Cmpxchg8,
+ Comisd,
+ Comiss,
+ Crc32,
+ Crc32_16,
+ Crc32_8,
+ Cvtdq2pd,
+ Cvtdq2ps,
+ Cvtpd2dq,
+ Cvtpd2ps,
+ Cvtps2dq,
+ Cvtps2pd,
+ Cvtsd2si,
+ Cvtsd2ss,
+ Cvtsi2sd,
+ Cvtsi2ss,
+ Cvtss2sd,
+ Cvtss2si,
+ Div,
+ Divpd,
+ Divps,
+ Divsd,
+ Divss,
+ Gf2p8affineqb,
+ Haddpd,
+ Haddps,
+ Idiv,
+ Imul,
+ Imul128,
+ Insertps,
+ Jmp,
+ Ldmxcsr,
+ Lea,
+ Maxpd,
+ Maxps,
+ Maxsd,
+ Maxss,
+ Minpd,
+ Minps,
+ Minsd,
+ Minss,
+ Mov,
+ Mov16,
+ Mov8,
+ Movd,
+ Movdqu,
+ Movhlps,
+ Movlhps,
+ Movq,
+ Movsd,
+ Movss,
+ Movsx16,
+ Movsx32,
+ Movsx8,
+ Movzx16,
+ Movzx8,
+ Mul128,
+ Mulpd,
+ Mulps,
+ Mulsd,
+ Mulss,
+ Neg,
+ Not,
+ Or,
+ Paddb,
+ Paddd,
+ Paddq,
+ Paddw,
+ Palignr,
+ Pand,
+ Pandn,
+ Pavgb,
+ Pavgw,
+ Pblendvb,
+ Pclmulqdq,
+ Pcmpeqb,
+ Pcmpeqd,
+ Pcmpeqq,
+ Pcmpeqw,
+ Pcmpgtb,
+ Pcmpgtd,
+ Pcmpgtq,
+ Pcmpgtw,
+ Pextrb,
+ Pextrd,
+ Pextrq,
+ Pextrw,
+ Pinsrb,
+ Pinsrd,
+ Pinsrq,
+ Pinsrw,
+ Pmaxsb,
+ Pmaxsd,
+ Pmaxsw,
+ Pmaxub,
+ Pmaxud,
+ Pmaxuw,
+ Pminsb,
+ Pminsd,
+ Pminsw,
+ Pminub,
+ Pminud,
+ Pminuw,
+ Pmovsxbw,
+ Pmovsxdq,
+ Pmovsxwd,
+ Pmovzxbw,
+ Pmovzxdq,
+ Pmovzxwd,
+ Pmulld,
+ Pmullw,
+ Pop,
+ Popcnt,
+ Por,
+ Pshufb,
+ Pshufd,
+ Pslld,
+ Pslldq,
+ Psllq,
+ Psllw,
+ Psrad,
+ Psraw,
+ Psrld,
+ Psrlq,
+ Psrldq,
+ Psrlw,
+ Psubb,
+ Psubd,
+ Psubq,
+ Psubw,
+ Punpckhbw,
+ Punpckhdq,
+ Punpckhqdq,
+ Punpckhwd,
+ Punpcklbw,
+ Punpckldq,
+ Punpcklqdq,
+ Punpcklwd,
+ Push,
+ Pxor,
+ Rcpps,
+ Rcpss,
+ Ror,
+ Roundpd,
+ Roundps,
+ Roundsd,
+ Roundss,
+ Rsqrtps,
+ Rsqrtss,
+ Sar,
+ Setcc,
+ Sha256Msg1,
+ Sha256Msg2,
+ Sha256Rnds2,
+ Shl,
+ Shr,
+ Shufpd,
+ Shufps,
+ Sqrtpd,
+ Sqrtps,
+ Sqrtsd,
+ Sqrtss,
+ Stmxcsr,
+ Sub,
+ Subpd,
+ Subps,
+ Subsd,
+ Subss,
+ Test,
+ Unpckhpd,
+ Unpckhps,
+ Unpcklpd,
+ Unpcklps,
+ Vblendvpd,
+ Vblendvps,
+ Vcvtph2ps,
+ Vcvtps2ph,
+ Vfmadd231pd,
+ Vfmadd231ps,
+ Vfmadd231sd,
+ Vfmadd231ss,
+ Vfmsub231sd,
+ Vfmsub231ss,
+ Vfnmadd231pd,
+ Vfnmadd231ps,
+ Vfnmadd231sd,
+ Vfnmadd231ss,
+ Vfnmsub231sd,
+ Vfnmsub231ss,
+ Vpblendvb,
+ Vpternlogd,
+ Xor,
+ Xorpd,
+ Xorps,
+
+ Count,
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs
new file mode 100644
index 0000000..690ca50
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs
@@ -0,0 +1,259 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class X86Optimizer
+ {
+ private const int MaxConstantUses = 10000;
+
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ var constants = new Dictionary();
+
+ Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+ {
+ // If the constant has many uses, we also force a new constant mov to be added, in order
+ // to avoid overflow of the counts field (that is limited to 16 bits).
+ if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+ {
+ constant = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, constant, source);
+
+ block.Operations.AddBefore(operation, copyOp);
+
+ constants[source.Value] = constant;
+ }
+
+ return constant;
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ constants.Clear();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ // Insert copies for constants that can't fit on a 32-bits immediate.
+ // Doing this early unblocks a few optimizations.
+ if (node.Instruction == Instruction.Add)
+ {
+ Operand src1 = node.GetSource(0);
+ Operand src2 = node.GetSource(1);
+
+ if (src1.Kind == OperandKind.Constant && (src1.Relocatable || CodeGenCommon.IsLongConst(src1)))
+ {
+ node.SetSource(0, GetConstantCopy(block, node, src1));
+ }
+
+ if (src2.Kind == OperandKind.Constant && (src2.Relocatable || CodeGenCommon.IsLongConst(src2)))
+ {
+ node.SetSource(1, GetConstantCopy(block, node, src2));
+ }
+ }
+
+ // Try to fold something like:
+ // shl rbx, 2
+ // add rax, rbx
+ // add rax, 0xcafe
+ // mov rax, [rax]
+ // Into:
+ // mov rax, [rax+rbx*4+0xcafe]
+ if (IsMemoryLoadOrStore(node.Instruction))
+ {
+ OperandType type;
+
+ if (node.Destination != default)
+ {
+ type = node.Destination.Type;
+ }
+ else
+ {
+ type = node.GetSource(1).Type;
+ }
+
+ Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+
+ if (memOp != default)
+ {
+ node.SetSource(0, memOp);
+ }
+ }
+ }
+ }
+
+ Optimizer.RemoveUnusedNodes(cfg);
+ }
+
+ private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+ {
+ Operand baseOp = addr;
+
+ // First we check if the address is the result of a local X with 32-bits immediate
+ // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+ // becomes the addition immediate. Otherwise baseOp keeps being the address.
+ int imm = GetConstOp(ref baseOp);
+
+ // Now we check if the baseOp is the result of a local Y with a local Z addition.
+ // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+ // if Z is the result of a left shift of local W by a value >= 0 and <= 3, if that
+ // is the case, we set indexOp to W and adjust the scale value of the memory operand
+ // to match that of the left shift.
+ // There is one missed case, which is the address being a shift result, but this is
+ // probably not worth optimizing as it should never happen.
+ (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp);
+
+ // If baseOp is still equal to address, then there's nothing that can be optimized.
+ if (baseOp == addr)
+ {
+ return default;
+ }
+
+ if (imm == 0 && scale == Multiplier.x1 && indexOp != default)
+ {
+ imm = GetConstOp(ref indexOp);
+ }
+
+ return MemoryOp(type, baseOp, indexOp, scale, imm);
+ }
+
+ private static int GetConstOp(ref Operand baseOp)
+ {
+ Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (operation == default)
+ {
+ return 0;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ Operand constOp;
+ Operand otherOp;
+
+ if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+ {
+ constOp = src1;
+ otherOp = src2;
+ }
+ else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+ {
+ constOp = src2;
+ otherOp = src1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ // If we have addition by 64-bits constant, then we can't optimize it further,
+ // as we can't encode a 64-bits immediate on the memory operand.
+ if (CodeGenCommon.IsLongConst(constOp))
+ {
+ return 0;
+ }
+
+ baseOp = otherOp;
+
+ return constOp.AsInt32();
+ }
+
+ private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp)
+ {
+ Operand indexOp = default;
+
+ Multiplier scale = Multiplier.x1;
+
+ Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (addOp == default)
+ {
+ return (indexOp, scale);
+ }
+
+ Operand src1 = addOp.GetSource(0);
+ Operand src2 = addOp.GetSource(1);
+
+ if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+ {
+ return (indexOp, scale);
+ }
+
+ baseOp = src1;
+ indexOp = src2;
+
+ Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+
+ bool indexOnSrc2 = false;
+
+ if (shlOp == default)
+ {
+ shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+
+ indexOnSrc2 = true;
+ }
+
+ if (shlOp != default)
+ {
+ Operand shSrc = shlOp.GetSource(0);
+ Operand shift = shlOp.GetSource(1);
+
+ if (shSrc.Kind == OperandKind.LocalVariable && shift.Kind == OperandKind.Constant && shift.Value <= 3)
+ {
+ scale = shift.Value switch
+ {
+ 1 => Multiplier.x2,
+ 2 => Multiplier.x4,
+ 3 => Multiplier.x8,
+ _ => Multiplier.x1,
+ };
+
+ baseOp = indexOnSrc2 ? src1 : src2;
+ indexOp = shSrc;
+ }
+ }
+
+ return (indexOp, scale);
+ }
+
+ private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+ {
+ // If we have multiple assignments, folding is not safe
+ // as the value may be different depending on the
+ // control flow path.
+ if (op.AssignmentsCount != 1)
+ {
+ return default;
+ }
+
+ Operation asgOp = op.Assignments[0];
+
+ if (asgOp.Instruction != inst)
+ {
+ return default;
+ }
+
+ return asgOp;
+ }
+
+ private static bool IsMemoryLoadOrStore(Instruction inst)
+ {
+ return inst == Instruction.Load ||
+ inst == Instruction.Load16 ||
+ inst == Instruction.Load8 ||
+ inst == Instruction.Store ||
+ inst == Instruction.Store16 ||
+ inst == Instruction.Store8;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/X86Register.cs b/src/ARMeilleure/CodeGen/X86/X86Register.cs
new file mode 100644
index 0000000..0a65636
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Register.cs
@@ -0,0 +1,44 @@
+using System.Diagnostics.CodeAnalysis;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ [SuppressMessage("Design", "CA1069: Enums values should not be duplicated")]
+ enum X86Register
+ {
+ Invalid = -1,
+
+ Rax = 0,
+ Rcx = 1,
+ Rdx = 2,
+ Rbx = 3,
+ Rsp = 4,
+ Rbp = 5,
+ Rsi = 6,
+ Rdi = 7,
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+
+ Xmm0 = 0,
+ Xmm1 = 1,
+ Xmm2 = 2,
+ Xmm3 = 3,
+ Xmm4 = 4,
+ Xmm5 = 5,
+ Xmm6 = 6,
+ Xmm7 = 7,
+ Xmm8 = 8,
+ Xmm9 = 9,
+ Xmm10 = 10,
+ Xmm11 = 11,
+ Xmm12 = 12,
+ Xmm13 = 13,
+ Xmm14 = 14,
+ Xmm15 = 15,
+ }
+}
diff --git a/src/ARMeilleure/Common/AddressTable.cs b/src/ARMeilleure/Common/AddressTable.cs
new file mode 100644
index 0000000..fcab3a2
--- /dev/null
+++ b/src/ARMeilleure/Common/AddressTable.cs
@@ -0,0 +1,252 @@
+using ARMeilleure.Diagnostics;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Common
+{
+ ///
+ /// Represents a table of guest address to a value.
+ ///
+ /// Type of the value
+ public unsafe class AddressTable : IDisposable where TEntry : unmanaged
+ {
+ ///
+ /// Represents a level in an .
+ ///
+ public readonly struct Level
+ {
+ ///
+ /// Gets the index of the in the guest address.
+ ///
+ public int Index { get; }
+
+ ///
+ /// Gets the length of the in the guest address.
+ ///
+ public int Length { get; }
+
+ ///
+ /// Gets the mask which masks the bits used by the .
+ ///
+ public ulong Mask => ((1ul << Length) - 1) << Index;
+
+ ///
+ /// Initializes a new instance of the structure with the specified
+ /// and .
+ ///
+ /// Index of the
+ /// Length of the
+ public Level(int index, int length)
+ {
+ (Index, Length) = (index, length);
+ }
+
+ ///
+ /// Gets the value of the from the specified guest .
+ ///
+ /// Guest address
+ /// Value of the from the specified guest
+ public int GetValue(ulong address)
+ {
+ return (int)((address & Mask) >> Index);
+ }
+ }
+
+ private bool _disposed;
+ private TEntry** _table;
+ private readonly List _pages;
+
+ ///
+ /// Gets the bits used by the of the instance.
+ ///
+ public ulong Mask { get; }
+
+ ///
+ /// Gets the s used by the instance.
+ ///
+ public Level[] Levels { get; }
+
+ ///
+ /// Gets or sets the default fill value of newly created leaf pages.
+ ///
+ public TEntry Fill { get; set; }
+
+ ///
+ /// Gets the base address of the .
+ ///
+ /// instance was disposed
+ public IntPtr Base
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_pages)
+ {
+ return (IntPtr)GetRootPage();
+ }
+ }
+ }
+
+ ///
+ /// Constructs a new instance of the class with the specified list of
+ /// .
+ ///
+ /// is null
+ /// Length of is less than 2
+ public AddressTable(Level[] levels)
+ {
+ ArgumentNullException.ThrowIfNull(levels);
+
+ if (levels.Length < 2)
+ {
+ throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels));
+ }
+
+ _pages = new List(capacity: 16);
+
+ Levels = levels;
+ Mask = 0;
+
+ foreach (var level in Levels)
+ {
+ Mask |= level.Mask;
+ }
+ }
+
+ ///
+ /// Determines if the specified is in the range of the
+ /// .
+ ///
+ /// Guest address
+ /// if is valid; otherwise
+ public bool IsValid(ulong address)
+ {
+ return (address & ~Mask) == 0;
+ }
+
+ ///
+ /// Gets a reference to the value at the specified guest .
+ ///
+ /// Guest address
+ /// Reference to the value at the specified guest
+ /// instance was disposed
+ /// is not mapped
+ public ref TEntry GetValue(ulong address)
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ if (!IsValid(address))
+ {
+ throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address));
+ }
+
+ lock (_pages)
+ {
+ return ref GetPage(address)[Levels[^1].GetValue(address)];
+ }
+ }
+
+ ///
+ /// Gets the leaf page for the specified guest .
+ ///
+ /// Guest address
+ /// Leaf page for the specified guest
+ private TEntry* GetPage(ulong address)
+ {
+ TEntry** page = GetRootPage();
+
+ for (int i = 0; i < Levels.Length - 1; i++)
+ {
+ ref Level level = ref Levels[i];
+ ref TEntry* nextPage = ref page[level.GetValue(address)];
+
+ if (nextPage == null)
+ {
+ ref Level nextLevel = ref Levels[i + 1];
+
+ nextPage = i == Levels.Length - 2 ?
+ (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) :
+ (TEntry*)Allocate(1 << nextLevel.Length, IntPtr.Zero, leaf: false);
+ }
+
+ page = (TEntry**)nextPage;
+ }
+
+ return (TEntry*)page;
+ }
+
+ ///
+ /// Lazily initialize and get the root page of the .
+ ///
+ /// Root page of the
+ private TEntry** GetRootPage()
+ {
+ if (_table == null)
+ {
+ _table = (TEntry**)Allocate(1 << Levels[0].Length, fill: IntPtr.Zero, leaf: false);
+ }
+
+ return _table;
+ }
+
+ ///
+ /// Allocates a block of memory of the specified type and length.
+ ///
+ /// Type of elements
+ /// Number of elements
+ /// Fill value
+ /// if leaf; otherwise
+ /// Allocated block
+ private IntPtr Allocate(int length, T fill, bool leaf) where T : unmanaged
+ {
+ var size = sizeof(T) * length;
+ var page = (IntPtr)NativeAllocator.Instance.Allocate((uint)size);
+ var span = new Span((void*)page, length);
+
+ span.Fill(fill);
+
+ _pages.Add(page);
+
+ TranslatorEventSource.Log.AddressTableAllocated(size, leaf);
+
+ return page;
+ }
+
+ ///
+ /// Releases all resources used by the instance.
+ ///
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ ///
+ /// Releases all unmanaged and optionally managed resources used by the
+ /// instance.
+ ///
+ /// to dispose managed resources also; otherwise just unmanaged resouces
+ protected virtual void Dispose(bool disposing)
+ {
+ if (!_disposed)
+ {
+ foreach (var page in _pages)
+ {
+ Marshal.FreeHGlobal(page);
+ }
+
+ _disposed = true;
+ }
+ }
+
+ ///
+ /// Frees resources used by the instance.
+ ///
+ ~AddressTable()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/Allocator.cs b/src/ARMeilleure/Common/Allocator.cs
new file mode 100644
index 0000000..6905a61
--- /dev/null
+++ b/src/ARMeilleure/Common/Allocator.cs
@@ -0,0 +1,24 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ unsafe abstract class Allocator : IDisposable
+ {
+ public T* Allocate(ulong count = 1) where T : unmanaged
+ {
+ return (T*)Allocate(count * (uint)sizeof(T));
+ }
+
+ public abstract void* Allocate(ulong size);
+
+ public abstract void Free(void* block);
+
+ protected virtual void Dispose(bool disposing) { }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/ArenaAllocator.cs b/src/ARMeilleure/Common/ArenaAllocator.cs
new file mode 100644
index 0000000..ce8e339
--- /dev/null
+++ b/src/ARMeilleure/Common/ArenaAllocator.cs
@@ -0,0 +1,190 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.Common
+{
+ unsafe sealed class ArenaAllocator : Allocator
+ {
+ private class PageInfo
+ {
+ public byte* Pointer;
+ public byte Unused;
+ public int UnusedCounter;
+ }
+
+ private int _lastReset;
+ private ulong _index;
+ private int _pageIndex;
+ private PageInfo _page;
+ private List _pages;
+ private readonly ulong _pageSize;
+ private readonly uint _pageCount;
+ private readonly List _extras;
+
+ public ArenaAllocator(uint pageSize, uint pageCount)
+ {
+ _lastReset = Environment.TickCount;
+
+ // Set _index to pageSize so that the first allocation goes through the slow path.
+ _index = pageSize;
+ _pageIndex = -1;
+
+ _page = null;
+ _pages = new List();
+ _pageSize = pageSize;
+ _pageCount = pageCount;
+
+ _extras = new List();
+ }
+
+ public Span AllocateSpan(ulong count) where T : unmanaged
+ {
+ return new Span(Allocate(count), (int)count);
+ }
+
+ public override void* Allocate(ulong size)
+ {
+ if (_index + size <= _pageSize)
+ {
+ byte* result = _page.Pointer + _index;
+
+ _index += size;
+
+ return result;
+ }
+
+ return AllocateSlow(size);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private void* AllocateSlow(ulong size)
+ {
+ if (size > _pageSize)
+ {
+ void* extra = NativeAllocator.Instance.Allocate(size);
+
+ _extras.Add((IntPtr)extra);
+
+ return extra;
+ }
+
+ if (_index + size > _pageSize)
+ {
+ _index = 0;
+ _pageIndex++;
+ }
+
+ if (_pageIndex < _pages.Count)
+ {
+ _page = _pages[_pageIndex];
+ _page.Unused = 0;
+ }
+ else
+ {
+ _page = new PageInfo
+ {
+ Pointer = (byte*)NativeAllocator.Instance.Allocate(_pageSize),
+ };
+
+ _pages.Add(_page);
+ }
+
+ byte* result = _page.Pointer + _index;
+
+ _index += size;
+
+ return result;
+ }
+
+ public override void Free(void* block) { }
+
+ public void Reset()
+ {
+ _index = _pageSize;
+ _pageIndex = -1;
+ _page = null;
+
+ // Free excess pages that was allocated.
+ while (_pages.Count > _pageCount)
+ {
+ NativeAllocator.Instance.Free(_pages[^1].Pointer);
+
+ _pages.RemoveAt(_pages.Count - 1);
+ }
+
+ // Free extra blocks that are not page-sized
+ foreach (IntPtr ptr in _extras)
+ {
+ NativeAllocator.Instance.Free((void*)ptr);
+ }
+
+ _extras.Clear();
+
+ // Free pooled pages that has not been used in a while. Remove pages at the back first, because we try to
+ // keep the pages at the front alive, since they're more likely to be hot and in the d-cache.
+ bool removing = true;
+
+ // If arena is used frequently, keep pages for longer. Otherwise keep pages for a shorter amount of time.
+ int now = Environment.TickCount;
+ int count = (now - _lastReset) switch
+ {
+ >= 5000 => 0,
+ >= 2500 => 50,
+ >= 1000 => 100,
+ >= 10 => 1500,
+ _ => 5000,
+ };
+
+ for (int i = _pages.Count - 1; i >= 0; i--)
+ {
+ PageInfo page = _pages[i];
+
+ if (page.Unused == 0)
+ {
+ page.UnusedCounter = 0;
+ }
+
+ page.UnusedCounter += page.Unused;
+ page.Unused = 1;
+
+ // If page not used after `count` resets, remove it.
+ if (removing && page.UnusedCounter >= count)
+ {
+ NativeAllocator.Instance.Free(page.Pointer);
+
+ _pages.RemoveAt(i);
+ }
+ else
+ {
+ removing = false;
+ }
+ }
+
+ _lastReset = now;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (_pages != null)
+ {
+ foreach (PageInfo info in _pages)
+ {
+ NativeAllocator.Instance.Free(info.Pointer);
+ }
+
+ foreach (IntPtr ptr in _extras)
+ {
+ NativeAllocator.Instance.Free((void*)ptr);
+ }
+
+ _pages = null;
+ }
+ }
+
+ ~ArenaAllocator()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/BitMap.cs b/src/ARMeilleure/Common/BitMap.cs
new file mode 100644
index 0000000..94d47ea
--- /dev/null
+++ b/src/ARMeilleure/Common/BitMap.cs
@@ -0,0 +1,222 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.Common
+{
+ unsafe class BitMap : IEnumerable, IDisposable
+ {
+ private const int IntSize = 64;
+ private const int IntMask = IntSize - 1;
+
+ private int _count;
+ private long* _masks;
+ private readonly Allocator _allocator;
+
+ public BitMap(Allocator allocator)
+ {
+ _allocator = allocator;
+ }
+
+ public BitMap(Allocator allocator, int capacity) : this(allocator)
+ {
+ EnsureCapacity(capacity);
+ }
+
+ public bool Set(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ long wordMask = 1L << wordBit;
+
+ if ((_masks[wordIndex] & wordMask) != 0)
+ {
+ return false;
+ }
+
+ _masks[wordIndex] |= wordMask;
+
+ return true;
+ }
+
+ public void Clear(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ long wordMask = 1L << wordBit;
+
+ _masks[wordIndex] &= ~wordMask;
+ }
+
+ public bool IsSet(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ return (_masks[wordIndex] & (1L << wordBit)) != 0;
+ }
+
+ public int FindFirstUnset()
+ {
+ for (int index = 0; index < _count; index++)
+ {
+ long mask = _masks[index];
+
+ if (mask != -1L)
+ {
+ return BitOperations.TrailingZeroCount(~mask) + index * IntSize;
+ }
+ }
+
+ return _count * IntSize;
+ }
+
+ public bool Set(BitMap map)
+ {
+ EnsureCapacity(map._count * IntSize);
+
+ bool modified = false;
+
+ for (int index = 0; index < _count; index++)
+ {
+ long newValue = _masks[index] | map._masks[index];
+
+ if (_masks[index] != newValue)
+ {
+ _masks[index] = newValue;
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ public bool Clear(BitMap map)
+ {
+ EnsureCapacity(map._count * IntSize);
+
+ bool modified = false;
+
+ for (int index = 0; index < _count; index++)
+ {
+ long newValue = _masks[index] & ~map._masks[index];
+
+ if (_masks[index] != newValue)
+ {
+ _masks[index] = newValue;
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ private void EnsureCapacity(int size)
+ {
+ int count = (size + IntMask) / IntSize;
+
+ if (count > _count)
+ {
+ var oldMask = _masks;
+ var oldSpan = new Span(_masks, _count);
+
+ _masks = _allocator.Allocate((uint)count);
+ _count = count;
+
+ var newSpan = new Span(_masks, _count);
+
+ oldSpan.CopyTo(newSpan);
+ newSpan[oldSpan.Length..].Clear();
+
+ _allocator.Free(oldMask);
+ }
+ }
+
+ public void Dispose()
+ {
+ if (_masks != null)
+ {
+ _allocator.Free(_masks);
+
+ _masks = null;
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ public Enumerator GetEnumerator()
+ {
+ return new Enumerator(this);
+ }
+
+ public struct Enumerator : IEnumerator
+ {
+ private long _index;
+ private long _mask;
+ private int _bit;
+ private readonly BitMap _map;
+
+ public readonly int Current => (int)_index * IntSize + _bit;
+ readonly object IEnumerator.Current => Current;
+
+ public Enumerator(BitMap map)
+ {
+ _index = -1;
+ _mask = 0;
+ _bit = 0;
+ _map = map;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool MoveNext()
+ {
+ if (_mask != 0)
+ {
+ _mask &= ~(1L << _bit);
+ }
+
+ // Manually hoist these loads, because RyuJIT does not.
+ long count = (uint)_map._count;
+ long* masks = _map._masks;
+
+ while (_mask == 0)
+ {
+ if (++_index >= count)
+ {
+ return false;
+ }
+
+ _mask = masks[_index];
+ }
+
+ _bit = BitOperations.TrailingZeroCount(_mask);
+
+ return true;
+ }
+
+ public readonly void Reset() { }
+
+ public readonly void Dispose() { }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/BitUtils.cs b/src/ARMeilleure/Common/BitUtils.cs
new file mode 100644
index 0000000..e7697ff
--- /dev/null
+++ b/src/ARMeilleure/Common/BitUtils.cs
@@ -0,0 +1,57 @@
+using System;
+using System.Numerics;
+
+namespace ARMeilleure.Common
+{
+ static class BitUtils
+ {
+ private static ReadOnlySpan HbsNibbleLut => new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+
+ public static long FillWithOnes(int bits)
+ {
+ return bits == 64 ? -1L : (1L << bits) - 1;
+ }
+
+ public static int HighestBitSet(int value)
+ {
+ return 31 - BitOperations.LeadingZeroCount((uint)value);
+ }
+
+ public static int HighestBitSetNibble(int value)
+ {
+ return HbsNibbleLut[value];
+ }
+
+ public static long Replicate(long bits, int size)
+ {
+ long output = 0;
+
+ for (int bit = 0; bit < 64; bit += size)
+ {
+ output |= bits << bit;
+ }
+
+ return output;
+ }
+
+ public static int RotateRight(int bits, int shift, int size)
+ {
+ return (int)RotateRight((uint)bits, shift, size);
+ }
+
+ public static uint RotateRight(uint bits, int shift, int size)
+ {
+ return (bits >> shift) | (bits << (size - shift));
+ }
+
+ public static long RotateRight(long bits, int shift, int size)
+ {
+ return (long)RotateRight((ulong)bits, shift, size);
+ }
+
+ public static ulong RotateRight(ulong bits, int shift, int size)
+ {
+ return (bits >> shift) | (bits << (size - shift));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/Counter.cs b/src/ARMeilleure/Common/Counter.cs
new file mode 100644
index 0000000..6db9561
--- /dev/null
+++ b/src/ARMeilleure/Common/Counter.cs
@@ -0,0 +1,98 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ ///
+ /// Represents a numeric counter which can be used for instrumentation of compiled code.
+ ///
+ /// Type of the counter
+ class Counter : IDisposable where T : unmanaged
+ {
+ private bool _disposed;
+ ///
+ /// Index in the
+ ///
+ private readonly int _index;
+ private readonly EntryTable _countTable;
+
+ ///
+ /// Initializes a new instance of the class from the specified
+ /// instance and index.
+ ///
+ /// instance
+ /// is
+ /// is unsupported
+ public Counter(EntryTable countTable)
+ {
+ if (typeof(T) != typeof(byte) && typeof(T) != typeof(sbyte) &&
+ typeof(T) != typeof(short) && typeof(T) != typeof(ushort) &&
+ typeof(T) != typeof(int) && typeof(T) != typeof(uint) &&
+ typeof(T) != typeof(long) && typeof(T) != typeof(ulong) &&
+ typeof(T) != typeof(nint) && typeof(T) != typeof(nuint) &&
+ typeof(T) != typeof(float) && typeof(T) != typeof(double))
+ {
+ throw new ArgumentException("Counter does not support the specified type.");
+ }
+
+ _countTable = countTable ?? throw new ArgumentNullException(nameof(countTable));
+ _index = countTable.Allocate();
+ }
+
+ ///
+ /// Gets a reference to the value of the counter.
+ ///
+ /// instance was disposed
+ ///
+ /// This can refer to freed memory if the owning is disposed.
+ ///
+ public ref T Value
+ {
+ get
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ return ref _countTable.GetValue(_index);
+ }
+ }
+
+ ///
+ /// Releases all resources used by the instance.
+ ///
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ ///
+ /// Releases all unmanaged and optionally managed resources used by the instance.
+ ///
+ /// to dispose managed resources also; otherwise just unmanaged resources
+ protected virtual void Dispose(bool disposing)
+ {
+ if (!_disposed)
+ {
+ try
+ {
+ // The index into the EntryTable is essentially an unmanaged resource since we allocate and free the
+ // resource ourselves.
+ _countTable.Free(_index);
+ }
+ catch (ObjectDisposedException)
+ {
+ // Can happen because _countTable may be disposed before the Counter instance.
+ }
+
+ _disposed = true;
+ }
+ }
+
+ ///
+ /// Frees resources used by the instance.
+ ///
+ ~Counter()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/EntryTable.cs b/src/ARMeilleure/Common/EntryTable.cs
new file mode 100644
index 0000000..625e3f7
--- /dev/null
+++ b/src/ARMeilleure/Common/EntryTable.cs
@@ -0,0 +1,188 @@
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+namespace ARMeilleure.Common
+{
+ ///
+ /// Represents an expandable table of the type , whose entries will remain at the same
+ /// address through out the table's lifetime.
+ ///
+ /// Type of the entry in the table
+ class EntryTable : IDisposable where TEntry : unmanaged
+ {
+ private bool _disposed;
+ private int _freeHint;
+ private readonly int _pageCapacity; // Number of entries per page.
+ private readonly int _pageLogCapacity;
+ private readonly Dictionary _pages;
+ private readonly BitMap _allocated;
+
+ ///
+ /// Initializes a new instance of the class with the desired page size in
+ /// bytes.
+ ///
+ /// Desired page size in bytes
+ /// is less than 0
+ /// 's size is zero
+ ///
+ /// The actual page size may be smaller or larger depending on the size of .
+ ///
+ public unsafe EntryTable(int pageSize = 4096)
+ {
+ if (pageSize < 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(pageSize), "Page size cannot be negative.");
+ }
+
+ if (sizeof(TEntry) == 0)
+ {
+ throw new ArgumentException("Size of TEntry cannot be zero.");
+ }
+
+ _allocated = new BitMap(NativeAllocator.Instance);
+ _pages = new Dictionary();
+ _pageLogCapacity = BitOperations.Log2((uint)(pageSize / sizeof(TEntry)));
+ _pageCapacity = 1 << _pageLogCapacity;
+ }
+
+ ///
+ /// Allocates an entry in the .
+ ///
+ /// Index of entry allocated in the table
+ /// instance was disposed
+ public int Allocate()
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_allocated)
+ {
+ if (_allocated.IsSet(_freeHint))
+ {
+ _freeHint = _allocated.FindFirstUnset();
+ }
+
+ int index = _freeHint++;
+ var page = GetPage(index);
+
+ _allocated.Set(index);
+
+ GetValue(page, index) = default;
+
+ return index;
+ }
+ }
+
+ ///
+ /// Frees the entry at the specified .
+ ///
+ /// Index of entry to free
+ /// instance was disposed
+ public void Free(int index)
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_allocated)
+ {
+ if (_allocated.IsSet(index))
+ {
+ _allocated.Clear(index);
+
+ _freeHint = index;
+ }
+ }
+ }
+
+ ///
+ /// Gets a reference to the entry at the specified allocated .
+ ///
+ /// Index of the entry
+ /// Reference to the entry at the specified
+ /// instance was disposed
+ /// Entry at is not allocated
+ public ref TEntry GetValue(int index)
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+
+ lock (_allocated)
+ {
+ if (!_allocated.IsSet(index))
+ {
+ throw new ArgumentException("Entry at the specified index was not allocated", nameof(index));
+ }
+
+ var page = GetPage(index);
+
+ return ref GetValue(page, index);
+ }
+ }
+
+ ///
+ /// Gets a reference to the entry at using the specified from the specified
+ /// .
+ ///
+ /// Page to use
+ /// Index to use
+ /// Reference to the entry
+ private ref TEntry GetValue(Span page, int index)
+ {
+ return ref page[index & (_pageCapacity - 1)];
+ }
+
+ ///
+ /// Gets the page for the specified .
+ ///
+ /// Index to use
+ /// Page for the specified
+ private unsafe Span GetPage(int index)
+ {
+ var pageIndex = (int)((uint)(index & ~(_pageCapacity - 1)) >> _pageLogCapacity);
+
+ if (!_pages.TryGetValue(pageIndex, out IntPtr page))
+ {
+ page = (IntPtr)NativeAllocator.Instance.Allocate((uint)sizeof(TEntry) * (uint)_pageCapacity);
+
+ _pages.Add(pageIndex, page);
+ }
+
+ return new Span((void*)page, _pageCapacity);
+ }
+
+ ///
+ /// Releases all resources used by the instance.
+ ///
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ ///
+ /// Releases all unmanaged and optionally managed resources used by the
+ /// instance.
+ ///
+ /// to dispose managed resources also; otherwise just unmanaged resouces
+ protected unsafe virtual void Dispose(bool disposing)
+ {
+ if (!_disposed)
+ {
+ _allocated.Dispose();
+
+ foreach (var page in _pages.Values)
+ {
+ NativeAllocator.Instance.Free((void*)page);
+ }
+
+ _disposed = true;
+ }
+ }
+
+ ///
+ /// Frees resources used by the instance.
+ ///
+ ~EntryTable()
+ {
+ Dispose(false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/EnumUtils.cs b/src/ARMeilleure/Common/EnumUtils.cs
new file mode 100644
index 0000000..2a4aa64
--- /dev/null
+++ b/src/ARMeilleure/Common/EnumUtils.cs
@@ -0,0 +1,12 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ static class EnumUtils
+ {
+ public static int GetCount(Type enumType)
+ {
+ return Enum.GetNames(enumType).Length;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Common/NativeAllocator.cs b/src/ARMeilleure/Common/NativeAllocator.cs
new file mode 100644
index 0000000..93c48ad
--- /dev/null
+++ b/src/ARMeilleure/Common/NativeAllocator.cs
@@ -0,0 +1,27 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Common
+{
+ unsafe sealed class NativeAllocator : Allocator
+ {
+ public static NativeAllocator Instance { get; } = new();
+
+ public override void* Allocate(ulong size)
+ {
+ void* result = (void*)Marshal.AllocHGlobal((IntPtr)size);
+
+ if (result == null)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return result;
+ }
+
+ public override void Free(void* block)
+ {
+ Marshal.FreeHGlobal((IntPtr)block);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/Block.cs b/src/ARMeilleure/Decoders/Block.cs
new file mode 100644
index 0000000..bb88170
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Block.cs
@@ -0,0 +1,101 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+ class Block
+ {
+ public ulong Address { get; set; }
+ public ulong EndAddress { get; set; }
+
+ public Block Next { get; set; }
+ public Block Branch { get; set; }
+
+ public bool Exit { get; set; }
+
+ public List OpCodes { get; }
+
+ public Block()
+ {
+ OpCodes = new List();
+ }
+
+ public Block(ulong address) : this()
+ {
+ Address = address;
+ }
+
+ public void Split(Block rightBlock)
+ {
+ int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
+
+ if (OpCodes[splitIndex].Address < rightBlock.Address)
+ {
+ splitIndex++;
+ }
+
+ int splitCount = OpCodes.Count - splitIndex;
+
+ if (splitCount <= 0)
+ {
+ throw new ArgumentException("Can't split at right block address.");
+ }
+
+ rightBlock.EndAddress = EndAddress;
+
+ rightBlock.Next = Next;
+ rightBlock.Branch = Branch;
+
+ rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
+
+ EndAddress = rightBlock.Address;
+
+ Next = rightBlock;
+ Branch = null;
+
+ OpCodes.RemoveRange(splitIndex, splitCount);
+ }
+
+ private static int BinarySearch(List opCodes, ulong address)
+ {
+ int left = 0;
+ int middle = 0;
+ int right = opCodes.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ middle = left + (size >> 1);
+
+ OpCode opCode = opCodes[middle];
+
+ if (address == (ulong)opCode.Address)
+ {
+ break;
+ }
+
+ if (address < (ulong)opCode.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return middle;
+ }
+
+ public OpCode GetLastOp()
+ {
+ if (OpCodes.Count > 0)
+ {
+ return OpCodes[^1];
+ }
+
+ return null;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/Condition.cs b/src/ARMeilleure/Decoders/Condition.cs
new file mode 100644
index 0000000..961825a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Condition.cs
@@ -0,0 +1,32 @@
+namespace ARMeilleure.Decoders
+{
+ enum Condition
+ {
+ Eq = 0,
+ Ne = 1,
+ GeUn = 2,
+ LtUn = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ GtUn = 8,
+ LeUn = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15,
+ }
+
+ static class ConditionExtensions
+ {
+ public static Condition Invert(this Condition cond)
+ {
+ // Bit 0 of all conditions is basically a negation bit, so
+ // inverting this bit has the effect of inverting the condition.
+ return (Condition)((int)cond ^ 1);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/DataOp.cs b/src/ARMeilleure/Decoders/DataOp.cs
new file mode 100644
index 0000000..f99fd5e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/DataOp.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum DataOp
+ {
+ Adr = 0,
+ Arithmetic = 1,
+ Logical = 2,
+ BitField = 3,
+ }
+}
diff --git a/src/ARMeilleure/Decoders/Decoder.cs b/src/ARMeilleure/Decoders/Decoder.cs
new file mode 100644
index 0000000..66d2869
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Decoder.cs
@@ -0,0 +1,393 @@
+using ARMeilleure.Decoders.Optimizations;
+using ARMeilleure.Instructions;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.Decoders
+{
+ static class Decoder
+ {
+ // We define a limit on the number of instructions that a function may have,
+ // this prevents functions being potentially too large, which would
+ // take too long to compile and use too much memory.
+ private const int MaxInstsPerFunction = 2500;
+
+ // For lower code quality translation, we set a lower limit since we're blocking execution.
+ private const int MaxInstsPerFunctionLowCq = 500;
+
+ public static Block[] Decode(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq, DecoderMode dMode)
+ {
+ List blocks = new();
+
+ Queue workQueue = new();
+
+ Dictionary visited = new();
+
+ Debug.Assert(MaxInstsPerFunctionLowCq <= MaxInstsPerFunction);
+
+ int opsCount = 0;
+
+ int instructionLimit = highCq ? MaxInstsPerFunction : MaxInstsPerFunctionLowCq;
+
+ Block GetBlock(ulong blkAddress)
+ {
+ if (!visited.TryGetValue(blkAddress, out Block block))
+ {
+ block = new Block(blkAddress);
+
+ if ((dMode != DecoderMode.MultipleBlocks && visited.Count >= 1) ||
+ opsCount > instructionLimit ||
+ (visited.Count > 0 && !memory.IsMapped(blkAddress)))
+ {
+ block.Exit = true;
+ block.EndAddress = blkAddress;
+ }
+
+ workQueue.Enqueue(block);
+
+ visited.Add(blkAddress, block);
+ }
+
+ return block;
+ }
+
+ GetBlock(address);
+
+ while (workQueue.TryDequeue(out Block currBlock))
+ {
+ // Check if the current block is inside another block.
+ if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ if (nBlock.Address == currBlock.Address)
+ {
+ throw new InvalidOperationException("Found duplicate block address on the list.");
+ }
+
+ currBlock.Exit = false;
+
+ nBlock.Split(currBlock);
+
+ blocks.Insert(nBlkIndex + 1, currBlock);
+
+ continue;
+ }
+
+ if (!currBlock.Exit)
+ {
+ // If we have a block after the current one, set the limit address.
+ ulong limitAddress = ulong.MaxValue;
+
+ if (nBlkIndex != blocks.Count)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ int nextIndex = nBlkIndex + 1;
+
+ if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+ {
+ limitAddress = blocks[nextIndex].Address;
+ }
+ else if (nBlock.Address > currBlock.Address)
+ {
+ limitAddress = blocks[nBlkIndex].Address;
+ }
+ }
+
+ if (dMode == DecoderMode.SingleInstruction)
+ {
+ // Only read at most one instruction
+ limitAddress = currBlock.Address + 1;
+ }
+
+ FillBlock(memory, mode, currBlock, limitAddress);
+
+ opsCount += currBlock.OpCodes.Count;
+
+ if (currBlock.OpCodes.Count != 0)
+ {
+ // Set child blocks. "Branch" is the block the branch instruction
+ // points to (when taken), "Next" is the block at the next address,
+ // executed when the branch is not taken. For Unconditional Branches
+ // (except BL/BLR that are sub calls) or end of executable, Next is null.
+ OpCode lastOp = currBlock.GetLastOp();
+
+ bool isCall = IsCall(lastOp);
+
+ if (lastOp is IOpCodeBImm op && !isCall)
+ {
+ currBlock.Branch = GetBlock((ulong)op.Immediate);
+ }
+
+ if (isCall || !(IsUnconditionalBranch(lastOp) || IsTrap(lastOp)))
+ {
+ currBlock.Next = GetBlock(currBlock.EndAddress);
+ }
+ }
+ }
+
+ // Insert the new block on the list (sorted by address).
+ if (blocks.Count != 0)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
+ }
+ else
+ {
+ blocks.Add(currBlock);
+ }
+ }
+
+ if (blocks.Count == 1 && blocks[0].OpCodes.Count == 0)
+ {
+ Debug.Assert(blocks[0].Exit);
+ Debug.Assert(blocks[0].Address == blocks[0].EndAddress);
+
+ throw new InvalidOperationException($"Decoded a single empty exit block. Entry point = 0x{address:X}.");
+ }
+
+ if (dMode == DecoderMode.MultipleBlocks)
+ {
+ return TailCallRemover.RunPass(address, blocks);
+ }
+ else
+ {
+ return blocks.ToArray();
+ }
+ }
+
+ public static bool BinarySearch(List blocks, ulong address, out int index)
+ {
+ index = 0;
+
+ int left = 0;
+ int right = blocks.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ int middle = left + (size >> 1);
+
+ Block block = blocks[middle];
+
+ index = middle;
+
+ if (address >= block.Address && address < block.EndAddress)
+ {
+ return true;
+ }
+
+ if (address < block.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return false;
+ }
+
+ private static void FillBlock(
+ IMemoryManager memory,
+ ExecutionMode mode,
+ Block block,
+ ulong limitAddress)
+ {
+ ulong address = block.Address;
+ int itBlockSize = 0;
+
+ OpCode opCode;
+
+ do
+ {
+ if (address >= limitAddress && itBlockSize == 0)
+ {
+ break;
+ }
+
+ opCode = DecodeOpCode(memory, address, mode);
+
+ block.OpCodes.Add(opCode);
+
+ address += (ulong)opCode.OpCodeSizeInBytes;
+
+ if (opCode is OpCodeT16IfThen it)
+ {
+ itBlockSize = it.IfThenBlockSize;
+ }
+ else if (itBlockSize > 0)
+ {
+ itBlockSize--;
+ }
+ }
+ while (!(IsBranch(opCode) || IsException(opCode)));
+
+ block.EndAddress = address;
+ }
+
+ private static bool IsBranch(OpCode opCode)
+ {
+ return opCode is OpCodeBImm ||
+ opCode is OpCodeBReg || IsAarch32Branch(opCode);
+ }
+
+ private static bool IsUnconditionalBranch(OpCode opCode)
+ {
+ return opCode is OpCodeBImmAl ||
+ opCode is OpCodeBReg || IsAarch32UnconditionalBranch(opCode);
+ }
+
+ private static bool IsAarch32UnconditionalBranch(OpCode opCode)
+ {
+ if (opCode is not OpCode32 op)
+ {
+ return false;
+ }
+
+ // Compare and branch instructions are always conditional.
+ if (opCode.Instruction.Name == InstName.Cbz ||
+ opCode.Instruction.Name == InstName.Cbnz)
+ {
+ return false;
+ }
+
+ // Note: On ARM32, most instructions have conditional execution,
+ // so there's no "Always" (unconditional) branch like on ARM64.
+ // We need to check if the condition is "Always" instead.
+ return IsAarch32Branch(op) && op.Cond >= Condition.Al;
+ }
+
+ private static bool IsAarch32Branch(OpCode opCode)
+ {
+ // Note: On ARM32, most ALU operations can write to R15 (PC),
+ // so we must consider such operations as a branch in potential aswell.
+ if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc)
+ {
+ if (opCode is OpCodeT32)
+ {
+ return opCode.Instruction.Name != InstName.Tst && opCode.Instruction.Name != InstName.Teq &&
+ opCode.Instruction.Name != InstName.Cmp && opCode.Instruction.Name != InstName.Cmn;
+ }
+ return true;
+ }
+
+ // Same thing for memory operations. We have the cases where PC is a target
+ // register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is
+ // a write back to PC (wback == true && Rn == 15), however the later may
+ // be "undefined" depending on the CPU, so compilers should not produce that.
+ if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult)
+ {
+ int rt, rn;
+
+ bool wBack, isLoad;
+
+ if (opCode is IOpCode32Mem opMem)
+ {
+ rt = opMem.Rt;
+ rn = opMem.Rn;
+ wBack = opMem.WBack;
+ isLoad = opMem.IsLoad;
+
+ // For the dual load, we also need to take into account the
+ // case were Rt2 == 15 (PC).
+ if (rt == 14 && opMem.Instruction.Name == InstName.Ldrd)
+ {
+ rt = RegisterAlias.Aarch32Pc;
+ }
+ }
+ else if (opCode is IOpCode32MemMult opMemMult)
+ {
+ const int PCMask = 1 << RegisterAlias.Aarch32Pc;
+
+ rt = (opMemMult.RegisterMask & PCMask) != 0 ? RegisterAlias.Aarch32Pc : 0;
+ rn = opMemMult.Rn;
+ wBack = opMemMult.PostOffset != 0;
+ isLoad = opMemMult.IsLoad;
+ }
+ else
+ {
+ throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder.");
+ }
+
+ if ((rt == RegisterAlias.Aarch32Pc && isLoad) ||
+ (rn == RegisterAlias.Aarch32Pc && wBack))
+ {
+ return true;
+ }
+ }
+
+ // Explicit branch instructions.
+ return opCode is IOpCode32BImm ||
+ opCode is IOpCode32BReg;
+ }
+
+ private static bool IsCall(OpCode opCode)
+ {
+ return opCode.Instruction.Name == InstName.Bl ||
+ opCode.Instruction.Name == InstName.Blr ||
+ opCode.Instruction.Name == InstName.Blx;
+ }
+
+ private static bool IsException(OpCode opCode)
+ {
+ return IsTrap(opCode) || opCode.Instruction.Name == InstName.Svc;
+ }
+
+ private static bool IsTrap(OpCode opCode)
+ {
+ return opCode.Instruction.Name == InstName.Brk ||
+ opCode.Instruction.Name == InstName.Trap ||
+ opCode.Instruction.Name == InstName.Und;
+ }
+
+ public static OpCode DecodeOpCode(IMemoryManager memory, ulong address, ExecutionMode mode)
+ {
+ int opCode = memory.Read(address);
+
+ InstDescriptor inst;
+
+ OpCodeTable.MakeOp makeOp;
+
+ if (mode == ExecutionMode.Aarch64)
+ {
+ (inst, makeOp) = OpCodeTable.GetInstA64(opCode);
+ }
+ else
+ {
+ if (mode == ExecutionMode.Aarch32Arm)
+ {
+ (inst, makeOp) = OpCodeTable.GetInstA32(opCode);
+ }
+ else /* if (mode == ExecutionMode.Aarch32Thumb) */
+ {
+ (inst, makeOp) = OpCodeTable.GetInstT32(opCode);
+ }
+ }
+
+ if (makeOp != null)
+ {
+ return makeOp(inst, address, opCode);
+ }
+ else
+ {
+ if (mode == ExecutionMode.Aarch32Thumb)
+ {
+ return new OpCodeT16(inst, address, opCode);
+ }
+ else
+ {
+ return new OpCode(inst, address, opCode);
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/DecoderHelper.cs b/src/ARMeilleure/Decoders/DecoderHelper.cs
new file mode 100644
index 0000000..35e5739
--- /dev/null
+++ b/src/ARMeilleure/Decoders/DecoderHelper.cs
@@ -0,0 +1,167 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ static class DecoderHelper
+ {
+ static DecoderHelper()
+ {
+ Imm8ToFP32Table = BuildImm8ToFP32Table();
+ Imm8ToFP64Table = BuildImm8ToFP64Table();
+ }
+
+ public static readonly uint[] Imm8ToFP32Table;
+ public static readonly ulong[] Imm8ToFP64Table;
+
+ private static uint[] BuildImm8ToFP32Table()
+ {
+ uint[] tbl = new uint[256];
+
+ for (int idx = 0; idx < tbl.Length; idx++)
+ {
+ tbl[idx] = ExpandImm8ToFP32((uint)idx);
+ }
+
+ return tbl;
+ }
+
+ private static ulong[] BuildImm8ToFP64Table()
+ {
+ ulong[] tbl = new ulong[256];
+
+ for (int idx = 0; idx < tbl.Length; idx++)
+ {
+ tbl[idx] = ExpandImm8ToFP64((ulong)idx);
+ }
+
+ return tbl;
+ }
+
+ // abcdefgh -> aBbbbbbc defgh000 00000000 00000000 (B = ~b)
+ private static uint ExpandImm8ToFP32(uint imm)
+ {
+ static uint MoveBit(uint bits, int from, int to)
+ {
+ return ((bits >> from) & 1U) << to;
+ }
+
+ return MoveBit(imm, 7, 31) | MoveBit(~imm, 6, 30) |
+ MoveBit(imm, 6, 29) | MoveBit(imm, 6, 28) |
+ MoveBit(imm, 6, 27) | MoveBit(imm, 6, 26) |
+ MoveBit(imm, 6, 25) | MoveBit(imm, 5, 24) |
+ MoveBit(imm, 4, 23) | MoveBit(imm, 3, 22) |
+ MoveBit(imm, 2, 21) | MoveBit(imm, 1, 20) |
+ MoveBit(imm, 0, 19);
+ }
+
+ // abcdefgh -> aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000 (B = ~b)
+ private static ulong ExpandImm8ToFP64(ulong imm)
+ {
+ static ulong MoveBit(ulong bits, int from, int to)
+ {
+ return ((bits >> from) & 1UL) << to;
+ }
+
+ return MoveBit(imm, 7, 63) | MoveBit(~imm, 6, 62) |
+ MoveBit(imm, 6, 61) | MoveBit(imm, 6, 60) |
+ MoveBit(imm, 6, 59) | MoveBit(imm, 6, 58) |
+ MoveBit(imm, 6, 57) | MoveBit(imm, 6, 56) |
+ MoveBit(imm, 6, 55) | MoveBit(imm, 6, 54) |
+ MoveBit(imm, 5, 53) | MoveBit(imm, 4, 52) |
+ MoveBit(imm, 3, 51) | MoveBit(imm, 2, 50) |
+ MoveBit(imm, 1, 49) | MoveBit(imm, 0, 48);
+ }
+
+ public struct BitMask
+ {
+ public long WMask;
+ public long TMask;
+ public int Pos;
+ public int Shift;
+ public bool IsUndefined;
+
+ public static BitMask Invalid => new() { IsUndefined = true };
+ }
+
+ public static BitMask DecodeBitMask(int opCode, bool immediate)
+ {
+ int immS = (opCode >> 10) & 0x3f;
+ int immR = (opCode >> 16) & 0x3f;
+
+ int n = (opCode >> 22) & 1;
+ int sf = (opCode >> 31) & 1;
+
+ int length = BitUtils.HighestBitSet((~immS & 0x3f) | (n << 6));
+
+ if (length < 1 || (sf == 0 && n != 0))
+ {
+ return BitMask.Invalid;
+ }
+
+ int size = 1 << length;
+
+ int levels = size - 1;
+
+ int s = immS & levels;
+ int r = immR & levels;
+
+ if (immediate && s == levels)
+ {
+ return BitMask.Invalid;
+ }
+
+ long wMask = BitUtils.FillWithOnes(s + 1);
+ long tMask = BitUtils.FillWithOnes(((s - r) & levels) + 1);
+
+ if (r > 0)
+ {
+ wMask = BitUtils.RotateRight(wMask, r, size);
+ wMask &= BitUtils.FillWithOnes(size);
+ }
+
+ return new BitMask()
+ {
+ WMask = BitUtils.Replicate(wMask, size),
+ TMask = BitUtils.Replicate(tMask, size),
+
+ Pos = immS,
+ Shift = immR,
+ };
+ }
+
+ public static long DecodeImm24_2(int opCode)
+ {
+ return ((long)opCode << 40) >> 38;
+ }
+
+ public static long DecodeImm26_2(int opCode)
+ {
+ return ((long)opCode << 38) >> 36;
+ }
+
+ public static long DecodeImmS19_2(int opCode)
+ {
+ return (((long)opCode << 40) >> 43) & ~3;
+ }
+
+ public static long DecodeImmS14_2(int opCode)
+ {
+ return (((long)opCode << 45) >> 48) & ~3;
+ }
+
+ public static bool VectorArgumentsInvalid(bool q, params int[] args)
+ {
+ if (q)
+ {
+ for (int i = 0; i < args.Length; i++)
+ {
+ if ((args[i] & 1) == 1)
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/DecoderMode.cs b/src/ARMeilleure/Decoders/DecoderMode.cs
new file mode 100644
index 0000000..708d5c8
--- /dev/null
+++ b/src/ARMeilleure/Decoders/DecoderMode.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ enum DecoderMode
+ {
+ MultipleBlocks,
+ SingleBlock,
+ SingleInstruction,
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode.cs b/src/ARMeilleure/Decoders/IOpCode.cs
new file mode 100644
index 0000000..9d5e3bf
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode.cs
@@ -0,0 +1,17 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode
+ {
+ ulong Address { get; }
+
+ InstDescriptor Instruction { get; }
+
+ RegisterSize RegisterSize { get; }
+
+ int GetBitsCount();
+
+ OperandType GetOperandType();
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32.cs b/src/ARMeilleure/Decoders/IOpCode32.cs
new file mode 100644
index 0000000..578925d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32 : IOpCode
+ {
+ Condition Cond { get; }
+
+ uint GetPc();
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32Adr.cs b/src/ARMeilleure/Decoders/IOpCode32Adr.cs
new file mode 100644
index 0000000..40a4f52
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Adr.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Adr
+ {
+ int Rd { get; }
+
+ int Immediate { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32Alu.cs b/src/ARMeilleure/Decoders/IOpCode32Alu.cs
new file mode 100644
index 0000000..a85ef44
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Alu.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Alu : IOpCode32, IOpCode32HasSetFlags
+ {
+ int Rd { get; }
+ int Rn { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluBf.cs b/src/ARMeilleure/Decoders/IOpCode32AluBf.cs
new file mode 100644
index 0000000..d1fe590
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluBf.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluBf
+ {
+ int Rd { get; }
+ int Rn { get; }
+
+ int Msb { get; }
+ int Lsb { get; }
+
+ int SourceMask => (int)(0xFFFFFFFF >> (31 - Msb));
+ int DestMask => SourceMask & (int)(0xFFFFFFFF << Lsb);
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluImm.cs b/src/ARMeilleure/Decoders/IOpCode32AluImm.cs
new file mode 100644
index 0000000..b899901
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluImm : IOpCode32Alu
+ {
+ int Immediate { get; }
+
+ bool IsRotated { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs b/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs
new file mode 100644
index 0000000..dd42a70
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluImm16 : IOpCode32Alu
+ {
+ int Immediate { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluMla.cs b/src/ARMeilleure/Decoders/IOpCode32AluMla.cs
new file mode 100644
index 0000000..79b1642
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluMla.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluMla : IOpCode32AluReg
+ {
+ int Ra { get; }
+
+ bool NHigh { get; }
+ bool MHigh { get; }
+ bool R { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluReg.cs b/src/ARMeilleure/Decoders/IOpCode32AluReg.cs
new file mode 100644
index 0000000..1a35e66
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluReg : IOpCode32Alu
+ {
+ int Rm { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs b/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs
new file mode 100644
index 0000000..37a2c10
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluRsImm : IOpCode32Alu
+ {
+ int Rm { get; }
+ int Immediate { get; }
+
+ ShiftType ShiftType { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs b/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs
new file mode 100644
index 0000000..ed9859f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluRsReg : IOpCode32Alu
+ {
+ int Rm { get; }
+ int Rs { get; }
+
+ ShiftType ShiftType { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs b/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs
new file mode 100644
index 0000000..79d2bb9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluUmull : IOpCode32, IOpCode32HasSetFlags
+ {
+ int RdLo { get; }
+ int RdHi { get; }
+ int Rn { get; }
+ int Rm { get; }
+
+ bool NHigh { get; }
+ bool MHigh { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32AluUx.cs b/src/ARMeilleure/Decoders/IOpCode32AluUx.cs
new file mode 100644
index 0000000..d390f6b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32AluUx.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32AluUx : IOpCode32AluReg
+ {
+ int RotateBits { get; }
+ bool Add { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32BImm.cs b/src/ARMeilleure/Decoders/IOpCode32BImm.cs
new file mode 100644
index 0000000..8d22d5c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32BImm.cs
@@ -0,0 +1,4 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32BImm : IOpCode32, IOpCodeBImm { }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32BReg.cs b/src/ARMeilleure/Decoders/IOpCode32BReg.cs
new file mode 100644
index 0000000..9badc98
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32BReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32BReg : IOpCode32
+ {
+ int Rm { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32Exception.cs b/src/ARMeilleure/Decoders/IOpCode32Exception.cs
new file mode 100644
index 0000000..c38af90
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Exception.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Exception
+ {
+ int Id { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs b/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs
new file mode 100644
index 0000000..fd9337d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32HasSetFlags
+ {
+ bool? SetFlags { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32Mem.cs b/src/ARMeilleure/Decoders/IOpCode32Mem.cs
new file mode 100644
index 0000000..a34bc0e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Mem.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Mem : IOpCode32
+ {
+ int Rt { get; }
+ int Rt2 => Rt | 1;
+ int Rn { get; }
+
+ bool WBack { get; }
+ bool IsLoad { get; }
+ bool Index { get; }
+ bool Add { get; }
+
+ int Immediate { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemEx.cs b/src/ARMeilleure/Decoders/IOpCode32MemEx.cs
new file mode 100644
index 0000000..5f6b932
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemEx.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemEx : IOpCode32Mem
+ {
+ int Rd { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemMult.cs b/src/ARMeilleure/Decoders/IOpCode32MemMult.cs
new file mode 100644
index 0000000..0c5e48f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemMult.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemMult : IOpCode32
+ {
+ int Rn { get; }
+
+ int RegisterMask { get; }
+
+ int PostOffset { get; }
+
+ bool IsLoad { get; }
+
+ int Offset { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemReg.cs b/src/ARMeilleure/Decoders/IOpCode32MemReg.cs
new file mode 100644
index 0000000..6a63f7f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemReg : IOpCode32Mem
+ {
+ int Rm { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs b/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs
new file mode 100644
index 0000000..3407e98
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemRsImm : IOpCode32Mem
+ {
+ int Rm { get; }
+ ShiftType ShiftType { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32Simd.cs b/src/ARMeilleure/Decoders/IOpCode32Simd.cs
new file mode 100644
index 0000000..0dccd26
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32Simd.cs
@@ -0,0 +1,4 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Simd : IOpCode32, IOpCodeSimd { }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs b/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs
new file mode 100644
index 0000000..a8e6460
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32SimdImm : IOpCode32Simd
+ {
+ int Vd { get; }
+ long Immediate { get; }
+ int Elems { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeAlu.cs b/src/ARMeilleure/Decoders/IOpCodeAlu.cs
new file mode 100644
index 0000000..059769b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAlu.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAlu : IOpCode
+ {
+ int Rd { get; }
+ int Rn { get; }
+
+ DataOp DataOp { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeAluImm.cs b/src/ARMeilleure/Decoders/IOpCodeAluImm.cs
new file mode 100644
index 0000000..40a69cc
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAluImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluImm : IOpCodeAlu
+ {
+ long Immediate { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeAluRs.cs b/src/ARMeilleure/Decoders/IOpCodeAluRs.cs
new file mode 100644
index 0000000..eec9569
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAluRs.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluRs : IOpCodeAlu
+ {
+ int Shift { get; }
+ int Rm { get; }
+
+ ShiftType ShiftType { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeAluRx.cs b/src/ARMeilleure/Decoders/IOpCodeAluRx.cs
new file mode 100644
index 0000000..e5a8559
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeAluRx.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluRx : IOpCodeAlu
+ {
+ int Shift { get; }
+ int Rm { get; }
+
+ IntType IntType { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeBImm.cs b/src/ARMeilleure/Decoders/IOpCodeBImm.cs
new file mode 100644
index 0000000..9ce7512
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeBImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeBImm : IOpCode
+ {
+ long Immediate { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeCond.cs b/src/ARMeilleure/Decoders/IOpCodeCond.cs
new file mode 100644
index 0000000..6604f19
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeCond.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeCond : IOpCode
+ {
+ Condition Cond { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeLit.cs b/src/ARMeilleure/Decoders/IOpCodeLit.cs
new file mode 100644
index 0000000..434e4da
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeLit.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeLit : IOpCode
+ {
+ int Rt { get; }
+ long Immediate { get; }
+ int Size { get; }
+ bool Signed { get; }
+ bool Prefetch { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/IOpCodeSimd.cs b/src/ARMeilleure/Decoders/IOpCodeSimd.cs
new file mode 100644
index 0000000..598d9d7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IOpCodeSimd.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeSimd : IOpCode
+ {
+ int Size { get; }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/InstDescriptor.cs b/src/ARMeilleure/Decoders/InstDescriptor.cs
new file mode 100644
index 0000000..c35c754
--- /dev/null
+++ b/src/ARMeilleure/Decoders/InstDescriptor.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ readonly struct InstDescriptor
+ {
+ public static InstDescriptor Undefined => new(InstName.Und, InstEmit.Und);
+
+ public InstName Name { get; }
+ public InstEmitter Emitter { get; }
+
+ public InstDescriptor(InstName name, InstEmitter emitter)
+ {
+ Name = name;
+ Emitter = emitter;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/InstEmitter.cs b/src/ARMeilleure/Decoders/InstEmitter.cs
new file mode 100644
index 0000000..43bfcdc
--- /dev/null
+++ b/src/ARMeilleure/Decoders/InstEmitter.cs
@@ -0,0 +1,6 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.Decoders
+{
+ delegate void InstEmitter(ArmEmitterContext context);
+}
diff --git a/src/ARMeilleure/Decoders/IntType.cs b/src/ARMeilleure/Decoders/IntType.cs
new file mode 100644
index 0000000..937a569
--- /dev/null
+++ b/src/ARMeilleure/Decoders/IntType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ enum IntType
+ {
+ UInt8 = 0,
+ UInt16 = 1,
+ UInt32 = 2,
+ UInt64 = 3,
+ Int8 = 4,
+ Int16 = 5,
+ Int32 = 6,
+ Int64 = 7,
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode.cs b/src/ARMeilleure/Decoders/OpCode.cs
new file mode 100644
index 0000000..c812330
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode.cs
@@ -0,0 +1,48 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode : IOpCode
+ {
+ public ulong Address { get; }
+ public int RawOpCode { get; }
+
+ public int OpCodeSizeInBytes { get; protected set; } = 4;
+
+ public InstDescriptor Instruction { get; protected set; }
+
+ public RegisterSize RegisterSize { get; protected set; }
+
+ public static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new(inst, address, opCode);
+
+ public OpCode(InstDescriptor inst, ulong address, int opCode)
+ {
+ Instruction = inst;
+ Address = address;
+ RawOpCode = opCode;
+
+ RegisterSize = RegisterSize.Int64;
+ }
+
+ public int GetPairsCount() => GetBitsCount() / 16;
+ public int GetBytesCount() => GetBitsCount() / 8;
+
+ public int GetBitsCount()
+ {
+ return RegisterSize switch
+ {
+ RegisterSize.Int32 => 32,
+ RegisterSize.Int64 => 64,
+ RegisterSize.Simd64 => 64,
+ RegisterSize.Simd128 => 128,
+ _ => throw new InvalidOperationException(),
+ };
+ }
+
+ public OperandType GetOperandType()
+ {
+ return RegisterSize == RegisterSize.Int32 ? OperandType.I32 : OperandType.I64;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32.cs b/src/ARMeilleure/Decoders/OpCode32.cs
new file mode 100644
index 0000000..a2be01e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32.cs
@@ -0,0 +1,34 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32 : OpCode
+ {
+ public Condition Cond { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32(inst, address, opCode);
+
+ public OpCode32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ RegisterSize = RegisterSize.Int32;
+
+ Cond = (Condition)((uint)opCode >> 28);
+ }
+
+ public bool IsThumb { get; protected init; } = false;
+
+ public uint GetPc()
+ {
+ // Due to backwards compatibility and legacy behavior of ARMv4 CPUs pipeline,
+ // the PC actually points 2 instructions ahead.
+ if (IsThumb)
+ {
+ // PC is ahead by 4 in thumb mode whether or not the current instruction
+ // is 16 or 32 bit.
+ return (uint)Address + 4u;
+ }
+ else
+ {
+ return (uint)Address + 8u;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Alu.cs b/src/ARMeilleure/Decoders/OpCode32Alu.cs
new file mode 100644
index 0000000..8634f5c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Alu.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Alu : OpCode32, IOpCode32Alu
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Alu(inst, address, opCode);
+
+ public OpCode32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ SetFlags = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluBf.cs b/src/ARMeilleure/Decoders/OpCode32AluBf.cs
new file mode 100644
index 0000000..c347844
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluBf.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluBf : OpCode32, IOpCode32AluBf
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public int Msb { get; }
+ public int Lsb { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluBf(inst, address, opCode);
+
+ public OpCode32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 0) & 0xf;
+
+ Msb = (opCode >> 16) & 0x1f;
+ Lsb = (opCode >> 7) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluImm.cs b/src/ARMeilleure/Decoders/OpCode32AluImm.cs
new file mode 100644
index 0000000..c8b05e6
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluImm.cs
@@ -0,0 +1,23 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluImm : OpCode32Alu, IOpCode32AluImm
+ {
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluImm(inst, address, opCode);
+
+ public OpCode32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int value = (opCode >> 0) & 0xff;
+ int shift = (opCode >> 8) & 0xf;
+
+ Immediate = BitUtils.RotateRight(value, shift * 2, 32);
+
+ IsRotated = shift != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluImm16.cs b/src/ARMeilleure/Decoders/OpCode32AluImm16.cs
new file mode 100644
index 0000000..2af35bd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluImm16.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluImm16 : OpCode32Alu, IOpCode32AluImm16
+ {
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluImm16(inst, address, opCode);
+
+ public OpCode32AluImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm12 = opCode & 0xfff;
+ int imm4 = (opCode >> 16) & 0xf;
+
+ Immediate = (imm4 << 12) | imm12;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluMla.cs b/src/ARMeilleure/Decoders/OpCode32AluMla.cs
new file mode 100644
index 0000000..bc5d239
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluMla.cs
@@ -0,0 +1,30 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluMla : OpCode32, IOpCode32AluMla
+ {
+ public int Rn { get; }
+ public int Rm { get; }
+ public int Ra { get; }
+ public int Rd { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+ public bool R { get; }
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluMla(inst, address, opCode);
+
+ public OpCode32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0xf;
+ Rm = (opCode >> 8) & 0xf;
+ Ra = (opCode >> 12) & 0xf;
+ Rd = (opCode >> 16) & 0xf;
+ R = (opCode & (1 << 5)) != 0;
+
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ MHigh = ((opCode >> 6) & 0x1) == 1;
+ SetFlags = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluReg.cs b/src/ARMeilleure/Decoders/OpCode32AluReg.cs
new file mode 100644
index 0000000..9ef7571
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluReg : OpCode32Alu, IOpCode32AluReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluReg(inst, address, opCode);
+
+ public OpCode32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs
new file mode 100644
index 0000000..4b2c589
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluRsImm : OpCode32Alu, IOpCode32AluRsImm
+ {
+ public int Rm { get; }
+ public int Immediate { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluRsImm(inst, address, opCode);
+
+ public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Immediate = (opCode >> 7) & 0x1f;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs b/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs
new file mode 100644
index 0000000..6379b3b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluRsReg : OpCode32Alu, IOpCode32AluRsReg
+ {
+ public int Rm { get; }
+ public int Rs { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluRsReg(inst, address, opCode);
+
+ public OpCode32AluRsReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rs = (opCode >> 8) & 0xf;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluUmull.cs b/src/ARMeilleure/Decoders/OpCode32AluUmull.cs
new file mode 100644
index 0000000..44b7ea1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluUmull.cs
@@ -0,0 +1,30 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluUmull : OpCode32, IOpCode32AluUmull
+ {
+ public int RdLo { get; }
+ public int RdHi { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluUmull(inst, address, opCode);
+
+ public OpCode32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ RdLo = (opCode >> 12) & 0xf;
+ RdHi = (opCode >> 16) & 0xf;
+ Rm = (opCode >> 8) & 0xf;
+ Rn = (opCode >> 0) & 0xf;
+
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ MHigh = ((opCode >> 6) & 0x1) == 1;
+
+ SetFlags = ((opCode >> 20) & 0x1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32AluUx.cs b/src/ARMeilleure/Decoders/OpCode32AluUx.cs
new file mode 100644
index 0000000..68da302
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32AluUx.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluUx : OpCode32AluReg, IOpCode32AluUx
+ {
+ public int Rotate { get; }
+ public int RotateBits => Rotate * 8;
+ public bool Add => Rn != RegisterAlias.Aarch32Pc;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluUx(inst, address, opCode);
+
+ public OpCode32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rotate = (opCode >> 10) & 0x3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32BImm.cs b/src/ARMeilleure/Decoders/OpCode32BImm.cs
new file mode 100644
index 0000000..e7f5d6d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32BImm.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32BImm : OpCode32, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32BImm(inst, address, opCode);
+
+ public OpCode32BImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ uint pc = GetPc();
+
+ // When the condition is never, the instruction is BLX to Thumb mode.
+ if (Cond != Condition.Nv)
+ {
+ pc &= ~3u;
+ }
+
+ Immediate = pc + DecoderHelper.DecodeImm24_2(opCode);
+
+ if (Cond == Condition.Nv)
+ {
+ long H = (opCode >> 23) & 2;
+
+ Immediate |= H;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32BReg.cs b/src/ARMeilleure/Decoders/OpCode32BReg.cs
new file mode 100644
index 0000000..8939c0d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32BReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32BReg : OpCode32, IOpCode32BReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32BReg(inst, address, opCode);
+
+ public OpCode32BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = opCode & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Exception.cs b/src/ARMeilleure/Decoders/OpCode32Exception.cs
new file mode 100644
index 0000000..51a535e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Exception.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Exception : OpCode32, IOpCode32Exception
+ {
+ public int Id { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Exception(inst, address, opCode);
+
+ public OpCode32Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Id = opCode & 0xFFFFFF;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Mem.cs b/src/ARMeilleure/Decoders/OpCode32Mem.cs
new file mode 100644
index 0000000..8a24219
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Mem.cs
@@ -0,0 +1,39 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Mem : OpCode32, IOpCode32Mem
+ {
+ public int Rt { get; protected set; }
+ public int Rn { get; }
+
+ public int Immediate { get; protected set; }
+
+ public bool Index { get; }
+ public bool Add { get; }
+ public bool WBack { get; }
+ public bool Unprivileged { get; }
+
+ public bool IsLoad { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Mem(inst, address, opCode);
+
+ public OpCode32Mem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ Index = p;
+ Add = u;
+ WBack = !p || w;
+ Unprivileged = !p && w;
+
+ IsLoad = isLoad || inst.Name == InstName.Ldrd;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemImm.cs b/src/ARMeilleure/Decoders/OpCode32MemImm.cs
new file mode 100644
index 0000000..fa10e04
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemImm.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemImm : OpCode32Mem
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemImm(inst, address, opCode);
+
+ public OpCode32MemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = opCode & 0xfff;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemImm8.cs b/src/ARMeilleure/Decoders/OpCode32MemImm8.cs
new file mode 100644
index 0000000..248ee8e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemImm8.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemImm8 : OpCode32Mem
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemImm8(inst, address, opCode);
+
+ public OpCode32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm4L = (opCode >> 0) & 0xf;
+ int imm4H = (opCode >> 8) & 0xf;
+
+ Immediate = imm4L | (imm4H << 4);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs b/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs
new file mode 100644
index 0000000..0f0b37e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemLdEx : OpCode32Mem, IOpCode32MemEx
+ {
+ public int Rd { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemLdEx(inst, address, opCode);
+
+ public OpCode32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemMult.cs b/src/ARMeilleure/Decoders/OpCode32MemMult.cs
new file mode 100644
index 0000000..6e39e34
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemMult.cs
@@ -0,0 +1,52 @@
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemMult : OpCode32, IOpCode32MemMult
+ {
+ public int Rn { get; }
+
+ public int RegisterMask { get; }
+ public int Offset { get; }
+ public int PostOffset { get; }
+
+ public bool IsLoad { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemMult(inst, address, opCode);
+
+ public OpCode32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ RegisterMask = opCode & 0xffff;
+
+ int regsSize = BitOperations.PopCount((uint)RegisterMask) * 4;
+
+ if (!u)
+ {
+ Offset -= regsSize;
+ }
+
+ if (u == p)
+ {
+ Offset += 4;
+ }
+
+ if (w)
+ {
+ PostOffset = u ? regsSize : -regsSize;
+ }
+ else
+ {
+ PostOffset = 0;
+ }
+
+ IsLoad = isLoad;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemReg.cs b/src/ARMeilleure/Decoders/OpCode32MemReg.cs
new file mode 100644
index 0000000..d8f1c29
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemReg : OpCode32Mem, IOpCode32MemReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemReg(inst, address, opCode);
+
+ public OpCode32MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs b/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs
new file mode 100644
index 0000000..b0e5aa4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemRsImm : OpCode32Mem, IOpCode32MemRsImm
+ {
+ public int Rm { get; }
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemRsImm(inst, address, opCode);
+
+ public OpCode32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Immediate = (opCode >> 7) & 0x1f;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MemStEx.cs b/src/ARMeilleure/Decoders/OpCode32MemStEx.cs
new file mode 100644
index 0000000..180a9b5
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MemStEx.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemStEx : OpCode32Mem, IOpCode32MemEx
+ {
+ public int Rd { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemStEx(inst, address, opCode);
+
+ public OpCode32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 12) & 0xf;
+ Rt = (opCode >> 0) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Mrs.cs b/src/ARMeilleure/Decoders/OpCode32Mrs.cs
new file mode 100644
index 0000000..b681b54
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Mrs.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Mrs : OpCode32
+ {
+ public bool R { get; }
+ public int Rd { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Mrs(inst, address, opCode);
+
+ public OpCode32Mrs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ R = ((opCode >> 22) & 1) != 0;
+ Rd = (opCode >> 12) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32MsrReg.cs b/src/ARMeilleure/Decoders/OpCode32MsrReg.cs
new file mode 100644
index 0000000..dcd06aa
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32MsrReg.cs
@@ -0,0 +1,29 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MsrReg : OpCode32
+ {
+ public bool R { get; }
+ public int Mask { get; }
+ public int Rd { get; }
+ public bool Banked { get; }
+ public int Rn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MsrReg(inst, address, opCode);
+
+ public OpCode32MsrReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ R = ((opCode >> 22) & 1) != 0;
+ Mask = (opCode >> 16) & 0xf;
+ Rd = (opCode >> 12) & 0xf;
+ Banked = ((opCode >> 9) & 1) != 0;
+ Rn = (opCode >> 0) & 0xf;
+
+ if (Rn == RegisterAlias.Aarch32Pc || Mask == 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Sat.cs b/src/ARMeilleure/Decoders/OpCode32Sat.cs
new file mode 100644
index 0000000..35c5cf4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Sat.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Sat : OpCode32
+ {
+ public int Rn { get; }
+ public int Imm5 { get; }
+ public int Rd { get; }
+ public int SatImm { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Sat(inst, address, opCode);
+
+ public OpCode32Sat(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0xf;
+ Imm5 = (opCode >> 7) & 0x1f;
+ Rd = (opCode >> 12) & 0xf;
+ SatImm = (opCode >> 16) & 0x1f;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 2);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Sat16.cs b/src/ARMeilleure/Decoders/OpCode32Sat16.cs
new file mode 100644
index 0000000..01f4d3b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Sat16.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Sat16 : OpCode32
+ {
+ public int Rn { get; }
+ public int Rd { get; }
+ public int SatImm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Sat16(inst, address, opCode);
+
+ public OpCode32Sat16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0xf;
+ Rd = (opCode >> 12) & 0xf;
+ SatImm = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32Simd.cs b/src/ARMeilleure/Decoders/OpCode32Simd.cs
new file mode 100644
index 0000000..1e69b23
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32Simd.cs
@@ -0,0 +1,33 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Simd : OpCode32SimdBase
+ {
+ public int Opc { get; protected set; }
+ public bool Q { get; protected set; }
+ public bool F { get; protected set; }
+ public bool U { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Simd(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32Simd(inst, address, opCode, true);
+
+ public OpCode32Simd(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 20) & 0x3;
+ Q = ((opCode >> 6) & 0x1) != 0;
+ F = ((opCode >> 10) & 0x1) != 0;
+ U = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0;
+ Opc = (opCode >> 7) & 0x3;
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32Simd) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdBase.cs b/src/ARMeilleure/Decoders/OpCode32SimdBase.cs
new file mode 100644
index 0000000..d0634a0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdBase.cs
@@ -0,0 +1,49 @@
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ abstract class OpCode32SimdBase : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; protected set; }
+ public int Vm { get; protected set; }
+ public int Size { get; protected set; }
+
+ // Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it,
+ // depending on instruction type.
+ //
+ // Qx: The quadword register that the target vector is contained in.
+ // Ix: The starting index of the target vector within the quadword, with size treated as integer.
+ // Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32)
+ public int Qd => GetQuadwordIndex(Vd);
+ public int Id => GetQuadwordSubindex(Vd) << (3 - Size);
+ public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 is fp16 which is an optional extension in ARMv8.2. We always assume 64.
+
+ public int Qm => GetQuadwordIndex(Vm);
+ public int Im => GetQuadwordSubindex(Vm) << (3 - Size);
+ public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1));
+
+ protected int GetQuadwordIndex(int index)
+ {
+ return RegisterSize switch
+ {
+ RegisterSize.Simd128 or RegisterSize.Simd64 => index >> 1,
+ _ => throw new InvalidOperationException(),
+ };
+ }
+
+ protected int GetQuadwordSubindex(int index)
+ {
+ return RegisterSize switch
+ {
+ RegisterSize.Simd128 => 0,
+ RegisterSize.Simd64 => index & 1,
+ _ => throw new InvalidOperationException(),
+ };
+ }
+
+ protected OpCode32SimdBase(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs b/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs
new file mode 100644
index 0000000..c0c8277
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ ///
+ /// A special alias that always runs in 64 bit int, to speed up binary ops a little.
+ ///
+ class OpCode32SimdBinary : OpCode32SimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdBinary(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdBinary(inst, address, opCode, true);
+
+ public OpCode32SimdBinary(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = 3;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs b/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs
new file mode 100644
index 0000000..d8bc109
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdCmpZ : OpCode32Simd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCmpZ(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCmpZ(inst, address, opCode, true);
+
+ public OpCode32SimdCmpZ(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 18) & 0x3;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtFFixed.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtFFixed.cs
new file mode 100644
index 0000000..200df73
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtFFixed.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdCvtFFixed : OpCode32Simd
+ {
+ public int Fbits { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFFixed(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFFixed(inst, address, opCode, true);
+
+ public OpCode32SimdCvtFFixed(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Opc = (opCode >> 8) & 0x1;
+
+ Size = Opc == 1 ? 0 : 2;
+ Fbits = 64 - ((opCode >> 16) & 0x3f);
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
new file mode 100644
index 0000000..ee8f94a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdCvtFI : OpCode32SimdS
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFI(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFI(inst, address, opCode, true);
+
+ public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Opc = (opCode >> 7) & 0x1;
+
+ bool toInteger = (Opc2 & 0b100) != 0;
+
+ if (toInteger)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs
new file mode 100644
index 0000000..d3beb4b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs
@@ -0,0 +1,44 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdCvtTB : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; }
+ public int Vm { get; }
+ public bool Op { get; } // Convert to Half / Convert from Half
+ public bool T { get; } // Top / Bottom
+ public int Size { get; } // Double / Single
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, true);
+
+ public OpCode32SimdCvtTB(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Op = ((opCode >> 16) & 0x1) != 0;
+ T = ((opCode >> 7) & 0x1) != 0;
+ Size = ((opCode >> 8) & 0x1);
+
+ RegisterSize = Size == 1 ? RegisterSize.Int64 : RegisterSize.Int32;
+
+ if (Size == 1)
+ {
+ if (Op)
+ {
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+ }
+ else
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs
new file mode 100644
index 0000000..b6cdff0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs
@@ -0,0 +1,43 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdDupElem : OpCode32Simd
+ {
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupElem(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupElem(inst, address, opCode, true);
+
+ public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ var opc = (opCode >> 16) & 0xf;
+
+ if ((opc & 0b1) == 1)
+ {
+ Size = 0;
+ Index = (opc >> 1) & 0x7;
+ }
+ else if ((opc & 0b11) == 0b10)
+ {
+ Size = 1;
+ Index = (opc >> 2) & 0x3;
+ }
+ else if ((opc & 0b111) == 0b100)
+ {
+ Size = 2;
+ Index = (opc >> 3) & 0x1;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs b/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs
new file mode 100644
index 0000000..57adea5
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdDupGP : OpCode32, IOpCode32Simd
+ {
+ public int Size { get; }
+ public int Vd { get; }
+ public int Rt { get; }
+ public bool Q { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupGP(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupGP(inst, address, opCode, true);
+
+ public OpCode32SimdDupGP(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Size = 2 - (((opCode >> 21) & 0x2) | ((opCode >> 5) & 0x1)); // B:E - 0 for 32, 16 then 8.
+ if (Size == -1)
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+ Q = ((opCode >> 21) & 0x1) != 0;
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+ Rt = ((opCode >> 12) & 0xf);
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdExt.cs b/src/ARMeilleure/Decoders/OpCode32SimdExt.cs
new file mode 100644
index 0000000..4fe9f25
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdExt.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdExt : OpCode32SimdReg
+ {
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdExt(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdExt(inst, address, opCode, true);
+
+ public OpCode32SimdExt(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Immediate = (opCode >> 8) & 0xf;
+ Size = 0;
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn) || (!Q && Immediate > 7))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdImm.cs
new file mode 100644
index 0000000..9e931e7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdImm.cs
@@ -0,0 +1,38 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm
+ {
+ public bool Q { get; }
+ public long Immediate { get; }
+ public int Elems => GetBytesCount() >> Size;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm(inst, address, opCode, true);
+
+ public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Vd = (opCode >> 12) & 0xf;
+ Vd |= (opCode >> 18) & 0x10;
+
+ Q = ((opCode >> 6) & 0x1) > 0;
+
+ int cMode = (opCode >> 8) & 0xf;
+ int op = (opCode >> 5) & 0x1;
+
+ long imm;
+
+ imm = ((uint)opCode >> 0) & 0xf;
+ imm |= ((uint)opCode >> 12) & 0x70;
+ imm |= ((uint)opCode >> (isThumb ? 21 : 17)) & 0x80;
+
+ (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm);
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs b/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs
new file mode 100644
index 0000000..55df1ba
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdImm44 : OpCode32, IOpCode32SimdImm
+ {
+ public int Vd { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+ public int Elems { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm44(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm44(inst, address, opCode, true);
+
+ public OpCode32SimdImm44(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Size = (opCode >> 8) & 0x3;
+
+ bool single = Size != 3;
+
+ if (single)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+
+ long imm;
+
+ imm = ((uint)opCode >> 0) & 0xf;
+ imm |= ((uint)opCode >> 12) & 0xf0;
+
+ Immediate = (Size == 3) ? (long)DecoderHelper.Imm8ToFP64Table[(int)imm] : DecoderHelper.Imm8ToFP32Table[(int)imm];
+
+ RegisterSize = (!single) ? RegisterSize.Int64 : RegisterSize.Int32;
+ Elems = 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdLong.cs
new file mode 100644
index 0000000..5c068de
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdLong.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdLong : OpCode32SimdBase
+ {
+ public bool U { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdLong(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdLong(inst, address, opCode, true);
+
+ public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ int imm3h = (opCode >> 19) & 0x7;
+
+ // The value must be a power of 2, otherwise it is the encoding of another instruction.
+ switch (imm3h)
+ {
+ case 1:
+ Size = 0;
+ break;
+ case 2:
+ Size = 1;
+ break;
+ case 4:
+ Size = 2;
+ break;
+ }
+
+ U = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0;
+
+ RegisterSize = RegisterSize.Simd64;
+
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs
new file mode 100644
index 0000000..86870df
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs
@@ -0,0 +1,40 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemImm : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; }
+ public int Rn { get; }
+ public int Size { get; }
+ public bool Add { get; }
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemImm(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemImm(inst, address, opCode, true);
+
+ public OpCode32SimdMemImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Immediate = opCode & 0xff;
+
+ Rn = (opCode >> 16) & 0xf;
+ Size = (opCode >> 8) & 0x3;
+
+ Immediate <<= (Size == 1) ? 1 : 2;
+
+ bool u = (opCode & (1 << 23)) != 0;
+ Add = u;
+
+ bool single = Size != 3;
+
+ if (single)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs
new file mode 100644
index 0000000..c3b8670
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs
@@ -0,0 +1,76 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemMult : OpCode32
+ {
+ public int Rn { get; }
+ public int Vd { get; }
+
+ public int RegisterRange { get; }
+ public int Offset { get; }
+ public int PostOffset { get; }
+ public bool IsLoad { get; }
+ public bool DoubleWidth { get; }
+ public bool Add { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemMult(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemMult(inst, address, opCode, true);
+
+ public OpCode32SimdMemMult(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ if (p == u && w)
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+
+ DoubleWidth = (opCode & (1 << 8)) != 0;
+
+ if (!DoubleWidth)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+
+ Add = u;
+
+ RegisterRange = opCode & 0xff;
+
+ int regsSize = RegisterRange * 4; // Double mode is still measured in single register size.
+
+ if (!u)
+ {
+ Offset -= regsSize;
+ }
+
+ if (w)
+ {
+ PostOffset = u ? regsSize : -regsSize;
+ }
+ else
+ {
+ PostOffset = 0;
+ }
+
+ IsLoad = isLoad;
+
+ int regs = DoubleWidth ? RegisterRange / 2 : RegisterRange;
+
+ if (RegisterRange == 0 || RegisterRange > 32 || Vd + regs > 32)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs
new file mode 100644
index 0000000..6a18211
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs
@@ -0,0 +1,50 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemPair : OpCode32, IOpCode32Simd
+ {
+ private static readonly int[] _regsMap =
+ {
+ 1, 1, 4, 2,
+ 1, 1, 3, 1,
+ 1, 1, 2, 1,
+ 1, 1, 1, 1,
+ };
+
+ public int Vd { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+ public int Align { get; }
+ public bool WBack { get; }
+ public bool RegisterIndex { get; }
+ public int Size { get; }
+ public int Elems => 8 >> Size;
+ public int Regs { get; }
+ public int Increment { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemPair(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemPair(inst, address, opCode, true);
+
+ public OpCode32SimdMemPair(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Vd = (opCode >> 12) & 0xf;
+ Vd |= (opCode >> 18) & 0x10;
+
+ Size = (opCode >> 6) & 0x3;
+
+ Align = (opCode >> 4) & 0x3;
+ Rm = (opCode >> 0) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ WBack = Rm != RegisterAlias.Aarch32Pc;
+ RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp;
+
+ Regs = _regsMap[(opCode >> 8) & 0xf];
+
+ Increment = ((opCode >> 8) & 0x1) + 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs
new file mode 100644
index 0000000..5df4500
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs
@@ -0,0 +1,51 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMemSingle : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+ public int IndexAlign { get; }
+ public int Index { get; }
+ public bool WBack { get; }
+ public bool RegisterIndex { get; }
+ public int Size { get; }
+ public bool Replicate { get; }
+ public int Increment { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemSingle(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemSingle(inst, address, opCode, true);
+
+ public OpCode32SimdMemSingle(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Vd = (opCode >> 12) & 0xf;
+ Vd |= (opCode >> 18) & 0x10;
+
+ IndexAlign = (opCode >> 4) & 0xf;
+
+ Size = (opCode >> 10) & 0x3;
+ Replicate = Size == 3;
+ if (Replicate)
+ {
+ Size = (opCode >> 6) & 0x3;
+ Increment = ((opCode >> 5) & 1) + 1;
+ Index = 0;
+ }
+ else
+ {
+ Increment = (((IndexAlign >> Size) & 1) == 0) ? 1 : 2;
+ Index = IndexAlign >> (1 + Size);
+ }
+
+ Rm = (opCode >> 0) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ WBack = Rm != RegisterAlias.Aarch32Pc;
+ RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs
new file mode 100644
index 0000000..35b8cc9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovGp : OpCode32, IOpCode32Simd
+ {
+ public int Size => 2;
+
+ public int Vn { get; }
+ public int Rt { get; }
+ public int Op { get; }
+
+ public int Opc1 { get; }
+ public int Opc2 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGp(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGp(inst, address, opCode, true);
+
+ public OpCode32SimdMovGp(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ // Which one is used is instruction dependant.
+ Op = (opCode >> 20) & 0x1;
+
+ Opc1 = (opCode >> 21) & 0x3;
+ Opc2 = (opCode >> 5) & 0x3;
+
+ Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e);
+ Rt = (opCode >> 12) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs
new file mode 100644
index 0000000..4399fb3
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovGpDouble : OpCode32, IOpCode32Simd
+ {
+ public int Size => 3;
+
+ public int Vm { get; }
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Op { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpDouble(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpDouble(inst, address, opCode, true);
+
+ public OpCode32SimdMovGpDouble(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ // Which one is used is instruction dependant.
+ Op = (opCode >> 20) & 0x1;
+
+ Rt = (opCode >> 12) & 0xf;
+ Rt2 = (opCode >> 16) & 0xf;
+
+ bool single = (opCode & (1 << 8)) == 0;
+ if (single)
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs
new file mode 100644
index 0000000..f6fce7d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs
@@ -0,0 +1,51 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovGpElem : OpCode32, IOpCode32Simd
+ {
+ public int Size { get; }
+
+ public int Vd { get; }
+ public int Rt { get; }
+ public int Op { get; }
+ public bool U { get; }
+
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpElem(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpElem(inst, address, opCode, true);
+
+ public OpCode32SimdMovGpElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Op = (opCode >> 20) & 0x1;
+ U = ((opCode >> 23) & 1) != 0;
+
+ var opc = (((opCode >> 23) & 1) << 4) | (((opCode >> 21) & 0x3) << 2) | ((opCode >> 5) & 0x3);
+
+ if ((opc & 0b01000) == 0b01000)
+ {
+ Size = 0;
+ Index = opc & 0x7;
+ }
+ else if ((opc & 0b01001) == 0b00001)
+ {
+ Size = 1;
+ Index = (opc >> 1) & 0x3;
+ }
+ else if ((opc & 0b11011) == 0)
+ {
+ Size = 2;
+ Index = (opc >> 2) & 0x1;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+
+ Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+ Rt = (opCode >> 12) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs
new file mode 100644
index 0000000..576e12c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdMovn : OpCode32Simd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovn(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovn(inst, address, opCode, true);
+
+ public OpCode32SimdMovn(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 18) & 0x3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdReg.cs b/src/ARMeilleure/Decoders/OpCode32SimdReg.cs
new file mode 100644
index 0000000..eaf17b8
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdReg.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdReg : OpCode32Simd
+ {
+ public int Vn { get; }
+
+ public int Qn => GetQuadwordIndex(Vn);
+ public int In => GetQuadwordSubindex(Vn) << (3 - Size);
+ public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1));
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdReg(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdReg(inst, address, opCode, true);
+
+ public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32SimdReg) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs
new file mode 100644
index 0000000..147de44
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegElem : OpCode32SimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElem(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElem(inst, address, opCode, true);
+
+ public OpCode32SimdRegElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0;
+ F = ((opCode >> 8) & 0x1) != 0;
+ Size = (opCode >> 20) & 0x3;
+
+ RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
+
+ if (Size == 1)
+ {
+ Vm = ((opCode >> 3) & 0x1) | ((opCode >> 4) & 0x2) | ((opCode << 2) & 0x1c);
+ }
+ else /* if (Size == 2) */
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ }
+
+ if (GetType() == typeof(OpCode32SimdRegElem) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs
new file mode 100644
index 0000000..8aea44c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegElemLong : OpCode32SimdRegElem
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElemLong(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElemLong(inst, address, opCode, true);
+
+ public OpCode32SimdRegElemLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ F = false;
+
+ RegisterSize = RegisterSize.Simd64;
+
+ // (Vd & 1) != 0 || Size == 3 are also invalid, but they are checked on encoding.
+ if (Size == 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs
new file mode 100644
index 0000000..1349fb4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegLong : OpCode32SimdReg
+ {
+ public bool Polynomial { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegLong(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegLong(inst, address, opCode, true);
+
+ public OpCode32SimdRegLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ Polynomial = ((opCode >> 9) & 0x1) != 0;
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32SimdRegLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs
new file mode 100644
index 0000000..2dfb007
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegS : OpCode32SimdS
+ {
+ public int Vn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegS(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegS(inst, address, opCode, true);
+
+ public OpCode32SimdRegS(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ bool single = Size != 3;
+ if (single)
+ {
+ Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e);
+ }
+ else
+ {
+ Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs
new file mode 100644
index 0000000..6f9c639
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRegWide : OpCode32SimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegWide(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegWide(inst, address, opCode, true);
+
+ public OpCode32SimdRegWide(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ // Subclasses have their own handling of Vx to account for before checking.
+ if (GetType() == typeof(OpCode32SimdRegWide) && DecoderHelper.VectorArgumentsInvalid(true, Vd, Vn))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRev.cs b/src/ARMeilleure/Decoders/OpCode32SimdRev.cs
new file mode 100644
index 0000000..26d8be2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdRev.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdRev : OpCode32SimdCmpZ
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRev(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRev(inst, address, opCode, true);
+
+ public OpCode32SimdRev(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ if (Opc + Size >= 3)
+ {
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+
+ // Currently, this instruction is treated as though it's OPCODE is the true size,
+ // which lets us deal with reversing vectors on a single element basis (eg. math magic an I64 rather than insert lots of I8s).
+ int tempSize = Size;
+ Size = 3 - Opc; // Op 0 is 64 bit, 1 is 32 and so on.
+ Opc = tempSize;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdS.cs b/src/ARMeilleure/Decoders/OpCode32SimdS.cs
new file mode 100644
index 0000000..0bb62cb
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdS.cs
@@ -0,0 +1,39 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdS : OpCode32, IOpCode32Simd
+ {
+ public int Vd { get; protected set; }
+ public int Vm { get; protected set; }
+ public int Opc { get; protected set; } // "with_zero" (Opc<1>) [Vcmp, Vcmpe].
+ public int Opc2 { get; } // opc2 or RM (opc2<1:0>) [Vcvt, Vrint].
+ public int Size { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdS(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdS(inst, address, opCode, true);
+
+ public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Opc = (opCode >> 15) & 0x3;
+ Opc2 = (opCode >> 16) & 0x7;
+
+ Size = (opCode >> 8) & 0x3;
+
+ bool single = Size != 3;
+
+ RegisterSize = single ? RegisterSize.Int32 : RegisterSize.Int64;
+
+ if (single)
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+ Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSel.cs b/src/ARMeilleure/Decoders/OpCode32SimdSel.cs
new file mode 100644
index 0000000..a6667ba
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdSel.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdSel : OpCode32SimdRegS
+ {
+ public OpCode32SimdSelMode Cc { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSel(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSel(inst, address, opCode, true);
+
+ public OpCode32SimdSel(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Cc = (OpCode32SimdSelMode)((opCode >> 20) & 3);
+ }
+ }
+
+ enum OpCode32SimdSelMode
+ {
+ Eq = 0,
+ Vs,
+ Ge,
+ Gt,
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs
new file mode 100644
index 0000000..040dce6
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs
@@ -0,0 +1,46 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdShImm : OpCode32Simd
+ {
+ public int Shift { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImm(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImm(inst, address, opCode, true);
+
+ public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ int imm6 = (opCode >> 16) & 0x3f;
+ int limm6 = ((opCode >> 1) & 0x40) | imm6;
+
+ if ((limm6 & 0x40) == 0b1000000)
+ {
+ Size = 3;
+ Shift = imm6;
+ }
+ else if ((limm6 & 0x60) == 0b0100000)
+ {
+ Size = 2;
+ Shift = imm6 - 32;
+ }
+ else if ((limm6 & 0x70) == 0b0010000)
+ {
+ Size = 1;
+ Shift = imm6 - 16;
+ }
+ else if ((limm6 & 0x78) == 0b0001000)
+ {
+ Size = 0;
+ Shift = imm6 - 8;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+
+ if (GetType() == typeof(OpCode32SimdShImm) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs
new file mode 100644
index 0000000..13d89ca
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs
@@ -0,0 +1,43 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdShImmLong : OpCode32Simd
+ {
+ public int Shift { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmLong(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmLong(inst, address, opCode, true);
+
+ public OpCode32SimdShImmLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ int imm6 = (opCode >> 16) & 0x3f;
+
+ if ((imm6 & 0x20) == 0b100000)
+ {
+ Size = 2;
+ Shift = imm6 - 32;
+ }
+ else if ((imm6 & 0x30) == 0b010000)
+ {
+ Size = 1;
+ Shift = imm6 - 16;
+ }
+ else if ((imm6 & 0x38) == 0b001000)
+ {
+ Size = 0;
+ Shift = imm6 - 8;
+ }
+ else
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+
+ if (GetType() == typeof(OpCode32SimdShImmLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs
new file mode 100644
index 0000000..ce1e790
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdShImmNarrow : OpCode32SimdShImm
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmNarrow(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmNarrow(inst, address, opCode, true);
+
+ public OpCode32SimdShImmNarrow(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) { }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs b/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs
new file mode 100644
index 0000000..9b6f473
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdSpecial : OpCode32
+ {
+ public int Rt { get; }
+ public int Sreg { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSpecial(inst, address, opCode, false);
+ public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSpecial(inst, address, opCode, true);
+
+ public OpCode32SimdSpecial(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode)
+ {
+ IsThumb = isThumb;
+
+ Rt = (opCode >> 12) & 0xf;
+ Sreg = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs b/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs
new file mode 100644
index 0000000..8f8fa4b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdSqrte : OpCode32Simd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSqrte(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSqrte(inst, address, opCode, true);
+
+ public OpCode32SimdSqrte(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Size = (opCode >> 18) & 0x1;
+ F = ((opCode >> 8) & 0x1) != 0;
+
+ if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs b/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs
new file mode 100644
index 0000000..fcac9e0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32SimdTbl : OpCode32SimdReg
+ {
+ public int Length { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdTbl(inst, address, opCode, false);
+ public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdTbl(inst, address, opCode, true);
+
+ public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb)
+ {
+ Length = (opCode >> 8) & 3;
+ Size = 0;
+ Opc = Q ? 1 : 0;
+ Q = false;
+ RegisterSize = RegisterSize.Simd64;
+
+ if (Vn + Length + 1 > 32)
+ {
+ Instruction = InstDescriptor.Undefined;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCode32System.cs b/src/ARMeilleure/Decoders/OpCode32System.cs
new file mode 100644
index 0000000..f6f5e0f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCode32System.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32System : OpCode32
+ {
+ public int Opc1 { get; }
+ public int CRn { get; }
+ public int Rt { get; }
+ public int Opc2 { get; }
+ public int CRm { get; }
+ public int MrrcOp { get; }
+
+ public int Coproc { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32System(inst, address, opCode);
+
+ public OpCode32System(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Opc1 = (opCode >> 21) & 0x7;
+ CRn = (opCode >> 16) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Opc2 = (opCode >> 5) & 0x7;
+ CRm = (opCode >> 0) & 0xf;
+ MrrcOp = (opCode >> 4) & 0xf;
+
+ Coproc = (opCode >> 8) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeAdr.cs b/src/ARMeilleure/Decoders/OpCodeAdr.cs
new file mode 100644
index 0000000..0802804
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAdr.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAdr : OpCode
+ {
+ public int Rd { get; }
+
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAdr(inst, address, opCode);
+
+ public OpCodeAdr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0x1f;
+
+ Immediate = DecoderHelper.DecodeImmS19_2(opCode);
+ Immediate |= ((long)opCode >> 29) & 3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeAlu.cs b/src/ARMeilleure/Decoders/OpCodeAlu.cs
new file mode 100644
index 0000000..1619ecd
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAlu.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAlu : OpCode, IOpCodeAlu
+ {
+ public int Rd { get; protected set; }
+ public int Rn { get; }
+
+ public DataOp DataOp { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAlu(inst, address, opCode);
+
+ public OpCodeAlu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ DataOp = (DataOp)((opCode >> 24) & 0x3);
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeAluBinary.cs b/src/ARMeilleure/Decoders/OpCodeAluBinary.cs
new file mode 100644
index 0000000..4413581
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluBinary.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluBinary : OpCodeAlu
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluBinary(inst, address, opCode);
+
+ public OpCodeAluBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeAluImm.cs b/src/ARMeilleure/Decoders/OpCodeAluImm.cs
new file mode 100644
index 0000000..0d2f720
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluImm.cs
@@ -0,0 +1,40 @@
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluImm : OpCodeAlu, IOpCodeAluImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluImm(inst, address, opCode);
+
+ public OpCodeAluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ if (DataOp == DataOp.Arithmetic)
+ {
+ Immediate = (opCode >> 10) & 0xfff;
+
+ int shift = (opCode >> 22) & 3;
+
+ Immediate <<= shift * 12;
+ }
+ else if (DataOp == DataOp.Logical)
+ {
+ var bm = DecoderHelper.DecodeBitMask(opCode, true);
+
+ if (bm.IsUndefined)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Immediate = bm.WMask;
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid data operation: {DataOp}", nameof(opCode));
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeAluRs.cs b/src/ARMeilleure/Decoders/OpCodeAluRs.cs
new file mode 100644
index 0000000..47a47e7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluRs.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluRs : OpCodeAlu, IOpCodeAluRs
+ {
+ public int Shift { get; }
+ public int Rm { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluRs(inst, address, opCode);
+
+ public OpCodeAluRs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int shift = (opCode >> 10) & 0x3f;
+
+ if (shift >= GetBitsCount())
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Shift = shift;
+
+ Rm = (opCode >> 16) & 0x1f;
+ ShiftType = (ShiftType)((opCode >> 22) & 0x3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeAluRx.cs b/src/ARMeilleure/Decoders/OpCodeAluRx.cs
new file mode 100644
index 0000000..c214867
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeAluRx.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluRx : OpCodeAlu, IOpCodeAluRx
+ {
+ public int Shift { get; }
+ public int Rm { get; }
+
+ public IntType IntType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluRx(inst, address, opCode);
+
+ public OpCodeAluRx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Shift = (opCode >> 10) & 0x7;
+ IntType = (IntType)((opCode >> 13) & 0x7);
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeBImm.cs b/src/ARMeilleure/Decoders/OpCodeBImm.cs
new file mode 100644
index 0000000..2848c14
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImm.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImm : OpCode, IOpCodeBImm
+ {
+ public long Immediate { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImm(inst, address, opCode);
+
+ public OpCodeBImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmAl.cs b/src/ARMeilleure/Decoders/OpCodeBImmAl.cs
new file mode 100644
index 0000000..6c4b28c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmAl.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmAl : OpCodeBImm
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmAl(inst, address, opCode);
+
+ public OpCodeBImmAl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (long)address + DecoderHelper.DecodeImm26_2(opCode);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs b/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs
new file mode 100644
index 0000000..c477dde
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmCmp : OpCodeBImm
+ {
+ public int Rt { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmCmp(inst, address, opCode);
+
+ public OpCodeBImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmCond.cs b/src/ARMeilleure/Decoders/OpCodeBImmCond.cs
new file mode 100644
index 0000000..7a51a07
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmCond.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmCond : OpCodeBImm, IOpCodeCond
+ {
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmCond(inst, address, opCode);
+
+ public OpCodeBImmCond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int o0 = (opCode >> 4) & 1;
+
+ if (o0 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Cond = (Condition)(opCode & 0xf);
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeBImmTest.cs b/src/ARMeilleure/Decoders/OpCodeBImmTest.cs
new file mode 100644
index 0000000..f989e59
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBImmTest.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmTest : OpCodeBImm
+ {
+ public int Rt { get; }
+ public int Bit { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmTest(inst, address, opCode);
+
+ public OpCodeBImmTest(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS14_2(opCode);
+
+ Bit = (opCode >> 19) & 0x1f;
+ Bit |= (opCode >> 26) & 0x20;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeBReg.cs b/src/ARMeilleure/Decoders/OpCodeBReg.cs
new file mode 100644
index 0000000..3b84cf5
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBReg.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBReg : OpCode
+ {
+ public int Rn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBReg(inst, address, opCode);
+
+ public OpCodeBReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int op4 = (opCode >> 0) & 0x1f;
+ int op2 = (opCode >> 16) & 0x1f;
+
+ if (op2 != 0b11111 || op4 != 0b00000)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rn = (opCode >> 5) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeBfm.cs b/src/ARMeilleure/Decoders/OpCodeBfm.cs
new file mode 100644
index 0000000..d51efad
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeBfm.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBfm : OpCodeAlu
+ {
+ public long WMask { get; }
+ public long TMask { get; }
+ public int Pos { get; }
+ public int Shift { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBfm(inst, address, opCode);
+
+ public OpCodeBfm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ var bm = DecoderHelper.DecodeBitMask(opCode, false);
+
+ if (bm.IsUndefined)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ WMask = bm.WMask;
+ TMask = bm.TMask;
+ Pos = bm.Pos;
+ Shift = bm.Shift;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeCcmp.cs b/src/ARMeilleure/Decoders/OpCodeCcmp.cs
new file mode 100644
index 0000000..d403534
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCcmp.cs
@@ -0,0 +1,32 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmp : OpCodeAlu, IOpCodeCond
+ {
+ public int Nzcv { get; }
+ protected int RmImm;
+
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmp(inst, address, opCode);
+
+ public OpCodeCcmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int o3 = (opCode >> 4) & 1;
+
+ if (o3 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Nzcv = (opCode >> 0) & 0xf;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ RmImm = (opCode >> 16) & 0x1f;
+
+ Rd = RegisterAlias.Zr;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs b/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs
new file mode 100644
index 0000000..9d6acf1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmpImm : OpCodeCcmp, IOpCodeAluImm
+ {
+ public long Immediate => RmImm;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmpImm(inst, address, opCode);
+
+ public OpCodeCcmpImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs b/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs
new file mode 100644
index 0000000..349afa1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmpReg : OpCodeCcmp, IOpCodeAluRs
+ {
+ public int Rm => RmImm;
+
+ public int Shift => 0;
+
+ public ShiftType ShiftType => ShiftType.Lsl;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmpReg(inst, address, opCode);
+
+ public OpCodeCcmpReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeCsel.cs b/src/ARMeilleure/Decoders/OpCodeCsel.cs
new file mode 100644
index 0000000..418962e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeCsel.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCsel : OpCodeAlu, IOpCodeCond
+ {
+ public int Rm { get; }
+
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCsel(inst, address, opCode);
+
+ public OpCodeCsel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 16) & 0x1f;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeException.cs b/src/ARMeilleure/Decoders/OpCodeException.cs
new file mode 100644
index 0000000..eee6364
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeException.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeException : OpCode
+ {
+ public int Id { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeException(inst, address, opCode);
+
+ public OpCodeException(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Id = (opCode >> 5) & 0xffff;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMem.cs b/src/ARMeilleure/Decoders/OpCodeMem.cs
new file mode 100644
index 0000000..9b4e5ff
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMem.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMem : OpCode
+ {
+ public int Rt { get; protected set; }
+ public int Rn { get; protected set; }
+ public int Size { get; protected set; }
+ public bool Extend64 { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMem(inst, address, opCode);
+
+ public OpCodeMem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ Size = (opCode >> 30) & 0x3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMemEx.cs b/src/ARMeilleure/Decoders/OpCodeMemEx.cs
new file mode 100644
index 0000000..1dc7314
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemEx.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemEx : OpCodeMem
+ {
+ public int Rt2 { get; }
+ public int Rs { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemEx(inst, address, opCode);
+
+ public OpCodeMemEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 10) & 0x1f;
+ Rs = (opCode >> 16) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMemImm.cs b/src/ARMeilleure/Decoders/OpCodeMemImm.cs
new file mode 100644
index 0000000..4d5eeb1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemImm.cs
@@ -0,0 +1,53 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemImm : OpCodeMem
+ {
+ public long Immediate { get; protected set; }
+ public bool WBack { get; protected set; }
+ public bool PostIdx { get; protected set; }
+ protected bool Unscaled { get; }
+
+ private enum MemOp
+ {
+ Unscaled = 0,
+ PostIndexed = 1,
+ Unprivileged = 2,
+ PreIndexed = 3,
+ Unsigned,
+ }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemImm(inst, address, opCode);
+
+ public OpCodeMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Extend64 = ((opCode >> 22) & 3) == 2;
+ WBack = ((opCode >> 24) & 1) == 0;
+
+ // The type is not valid for the Unsigned Immediate 12-bits encoding,
+ // because the bits 11:10 are used for the larger Immediate offset.
+ MemOp type = WBack ? (MemOp)((opCode >> 10) & 3) : MemOp.Unsigned;
+
+ PostIdx = type == MemOp.PostIndexed;
+ Unscaled = type == MemOp.Unscaled ||
+ type == MemOp.Unprivileged;
+
+ // Unscaled and Unprivileged doesn't write back,
+ // but they do use the 9-bits Signed Immediate.
+ if (Unscaled)
+ {
+ WBack = false;
+ }
+
+ if (WBack || Unscaled)
+ {
+ // 9-bits Signed Immediate.
+ Immediate = (opCode << 11) >> 23;
+ }
+ else
+ {
+ // 12-bits Unsigned Immediate.
+ Immediate = ((opCode >> 10) & 0xfff) << Size;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMemLit.cs b/src/ARMeilleure/Decoders/OpCodeMemLit.cs
new file mode 100644
index 0000000..8712a78
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemLit.cs
@@ -0,0 +1,44 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemLit : OpCode, IOpCodeLit
+ {
+ public int Rt { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+ public bool Signed { get; }
+ public bool Prefetch { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemLit(inst, address, opCode);
+
+ public OpCodeMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ switch ((opCode >> 30) & 3)
+ {
+ case 0:
+ Size = 2;
+ Signed = false;
+ Prefetch = false;
+ break;
+ case 1:
+ Size = 3;
+ Signed = false;
+ Prefetch = false;
+ break;
+ case 2:
+ Size = 2;
+ Signed = true;
+ Prefetch = false;
+ break;
+ case 3:
+ Size = 0;
+ Signed = false;
+ Prefetch = true;
+ break;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMemPair.cs b/src/ARMeilleure/Decoders/OpCodeMemPair.cs
new file mode 100644
index 0000000..eb696cf
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemPair.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemPair : OpCodeMemImm
+ {
+ public int Rt2 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemPair(inst, address, opCode);
+
+ public OpCodeMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 10) & 0x1f;
+ WBack = ((opCode >> 23) & 0x1) != 0;
+ PostIdx = ((opCode >> 23) & 0x3) == 1;
+ Extend64 = ((opCode >> 30) & 0x3) == 1;
+ Size = ((opCode >> 31) & 0x1) | 2;
+
+ DecodeImm(opCode);
+ }
+
+ protected void DecodeImm(int opCode)
+ {
+ Immediate = ((long)(opCode >> 15) << 57) >> (57 - Size);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMemReg.cs b/src/ARMeilleure/Decoders/OpCodeMemReg.cs
new file mode 100644
index 0000000..9b0d159
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMemReg.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemReg : OpCodeMem
+ {
+ public bool Shift { get; }
+ public int Rm { get; }
+
+ public IntType IntType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemReg(inst, address, opCode);
+
+ public OpCodeMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Shift = ((opCode >> 12) & 0x1) != 0;
+ IntType = (IntType)((opCode >> 13) & 0x7);
+ Rm = (opCode >> 16) & 0x1f;
+ Extend64 = ((opCode >> 22) & 0x3) == 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMov.cs b/src/ARMeilleure/Decoders/OpCodeMov.cs
new file mode 100644
index 0000000..a2914b7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMov.cs
@@ -0,0 +1,38 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMov : OpCode
+ {
+ public int Rd { get; }
+
+ public long Immediate { get; }
+
+ public int Bit { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMov(inst, address, opCode);
+
+ public OpCodeMov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int p1 = (opCode >> 22) & 1;
+ int sf = (opCode >> 31) & 1;
+
+ if (sf == 0 && p1 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rd = (opCode >> 0) & 0x1f;
+ Immediate = (opCode >> 5) & 0xffff;
+ Bit = (opCode >> 21) & 0x3;
+
+ Bit <<= 4;
+
+ Immediate <<= Bit;
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeMul.cs b/src/ARMeilleure/Decoders/OpCodeMul.cs
new file mode 100644
index 0000000..9b1dd37
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeMul.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMul : OpCodeAlu
+ {
+ public int Rm { get; }
+ public int Ra { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMul(inst, address, opCode);
+
+ public OpCodeMul(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Ra = (opCode >> 10) & 0x1f;
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimd.cs b/src/ARMeilleure/Decoders/OpCodeSimd.cs
new file mode 100644
index 0000000..bd34d74
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimd.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimd : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+ public int Opc { get; }
+ public int Size { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimd(inst, address, opCode);
+
+ public OpCodeSimd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ Opc = (opCode >> 15) & 0x3;
+ Size = (opCode >> 22) & 0x3;
+
+ RegisterSize = ((opCode >> 30) & 1) != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs b/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs
new file mode 100644
index 0000000..e50cf12
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdCvt : OpCodeSimd
+ {
+ public int FBits { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdCvt(inst, address, opCode);
+
+ public OpCodeSimdCvt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int scale = (opCode >> 10) & 0x3f;
+ int sf = (opCode >> 31) & 0x1;
+
+ FBits = 64 - scale;
+
+ RegisterSize = sf != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdExt.cs b/src/ARMeilleure/Decoders/OpCodeSimdExt.cs
new file mode 100644
index 0000000..0a3359e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdExt.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdExt : OpCodeSimdReg
+ {
+ public int Imm4 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdExt(inst, address, opCode);
+
+ public OpCodeSimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Imm4 = (opCode >> 11) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs b/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs
new file mode 100644
index 0000000..510cd31
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdFcond : OpCodeSimdReg, IOpCodeCond
+ {
+ public int Nzcv { get; }
+
+ public Condition Cond { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdFcond(inst, address, opCode);
+
+ public OpCodeSimdFcond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Nzcv = (opCode >> 0) & 0xf;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs b/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs
new file mode 100644
index 0000000..662abe2
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs
@@ -0,0 +1,32 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdFmov : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdFmov(inst, address, opCode);
+
+ public OpCodeSimdFmov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int type = (opCode >> 22) & 0x3;
+
+ Size = type;
+
+ long imm;
+
+ Rd = (opCode >> 0) & 0x1f;
+ imm = (opCode >> 13) & 0xff;
+
+ if (type == 0)
+ {
+ Immediate = (long)DecoderHelper.Imm8ToFP32Table[(int)imm];
+ }
+ else /* if (type == 1) */
+ {
+ Immediate = (long)DecoderHelper.Imm8ToFP64Table[(int)imm];
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs
new file mode 100644
index 0000000..b006cc9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs
@@ -0,0 +1,91 @@
+namespace ARMeilleure.Decoders
+{
+ public static class OpCodeSimdHelper
+ {
+ public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm)
+ {
+ int modeLow = cMode & 1;
+ int modeHigh = cMode >> 1;
+ int size = 0;
+
+ if (modeHigh == 0b111)
+ {
+ switch (op | (modeLow << 1))
+ {
+ case 0:
+ // 64-bits Immediate.
+ // Transform abcd efgh into abcd efgh abcd efgh ...
+ size = 3;
+ imm = (long)((ulong)imm * 0x0101010101010101);
+ break;
+
+ case 1:
+ // 64-bits Immediate.
+ // Transform abcd efgh into aaaa aaaa bbbb bbbb ...
+ size = 3;
+ imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4;
+ imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2;
+ imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1;
+
+ imm = (long)((ulong)imm * 0x8040201008040201);
+ imm = (long)((ulong)imm & 0x8080808080808080);
+
+ imm |= imm >> 4;
+ imm |= imm >> 2;
+ imm |= imm >> 1;
+ break;
+
+ case 2:
+ // 2 x 32-bits floating point Immediate.
+ size = 3;
+ imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm];
+ imm |= imm << 32;
+ break;
+
+ case 3:
+ // 64-bits floating point Immediate.
+ size = 3;
+ imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm];
+ break;
+ }
+ }
+ else if ((modeHigh & 0b110) == 0b100)
+ {
+ // 16-bits shifted Immediate.
+ size = 1;
+ imm <<= (modeHigh & 1) << 3;
+ }
+ else if ((modeHigh & 0b100) == 0b000)
+ {
+ // 32-bits shifted Immediate.
+ size = 2;
+ imm <<= modeHigh << 3;
+ }
+ else if ((modeHigh & 0b111) == 0b110)
+ {
+ // 32-bits shifted Immediate (fill with ones).
+ size = 2;
+ imm = ShlOnes(imm, 8 << modeLow);
+ }
+ else
+ {
+ // 8-bits without shift.
+ size = 0;
+ }
+
+ return (imm, size);
+ }
+
+ private static long ShlOnes(long value, int shift)
+ {
+ if (shift != 0)
+ {
+ return value << shift | (long)(ulong.MaxValue >> (64 - shift));
+ }
+ else
+ {
+ return value;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdImm.cs
new file mode 100644
index 0000000..3f4bad7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdImm.cs
@@ -0,0 +1,110 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdImm : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdImm(inst, address, opCode);
+
+ public OpCodeSimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0x1f;
+
+ int cMode = (opCode >> 12) & 0xf;
+ int op = (opCode >> 29) & 0x1;
+
+ int modeLow = cMode & 1;
+ int modeHigh = cMode >> 1;
+
+ long imm;
+
+ imm = ((uint)opCode >> 5) & 0x1f;
+ imm |= ((uint)opCode >> 11) & 0xe0;
+
+ if (modeHigh == 0b111)
+ {
+ switch (op | (modeLow << 1))
+ {
+ case 0:
+ // 64-bits Immediate.
+ // Transform abcd efgh into abcd efgh abcd efgh ...
+ Size = 3;
+ imm = (long)((ulong)imm * 0x0101010101010101);
+ break;
+
+ case 1:
+ // 64-bits Immediate.
+ // Transform abcd efgh into aaaa aaaa bbbb bbbb ...
+ Size = 3;
+ imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4;
+ imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2;
+ imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1;
+
+ imm = (long)((ulong)imm * 0x8040201008040201);
+ imm = (long)((ulong)imm & 0x8080808080808080);
+
+ imm |= imm >> 4;
+ imm |= imm >> 2;
+ imm |= imm >> 1;
+ break;
+
+ case 2:
+ // 2 x 32-bits floating point Immediate.
+ Size = 0;
+ imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm];
+ imm |= imm << 32;
+ break;
+
+ case 3:
+ // 64-bits floating point Immediate.
+ Size = 1;
+ imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm];
+ break;
+ }
+ }
+ else if ((modeHigh & 0b110) == 0b100)
+ {
+ // 16-bits shifted Immediate.
+ Size = 1;
+ imm <<= (modeHigh & 1) << 3;
+ }
+ else if ((modeHigh & 0b100) == 0b000)
+ {
+ // 32-bits shifted Immediate.
+ Size = 2;
+ imm <<= modeHigh << 3;
+ }
+ else if ((modeHigh & 0b111) == 0b110)
+ {
+ // 32-bits shifted Immediate (fill with ones).
+ Size = 2;
+ imm = ShlOnes(imm, 8 << modeLow);
+ }
+ else
+ {
+ // 8-bits without shift.
+ Size = 0;
+ }
+
+ Immediate = imm;
+
+ RegisterSize = ((opCode >> 30) & 1) != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+
+ private static long ShlOnes(long value, int shift)
+ {
+ if (shift != 0)
+ {
+ return value << shift | (long)(ulong.MaxValue >> (64 - shift));
+ }
+ else
+ {
+ return value;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdIns.cs b/src/ARMeilleure/Decoders/OpCodeSimdIns.cs
new file mode 100644
index 0000000..9543687
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdIns.cs
@@ -0,0 +1,44 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdIns : OpCodeSimd
+ {
+ public int SrcIndex { get; }
+ public int DstIndex { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdIns(inst, address, opCode);
+
+ public OpCodeSimdIns(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm4 = (opCode >> 11) & 0xf;
+ int imm5 = (opCode >> 16) & 0x1f;
+
+ if (imm5 == 0b10000)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Size = imm5 & -imm5;
+
+ switch (Size)
+ {
+ case 1:
+ Size = 0;
+ break;
+ case 2:
+ Size = 1;
+ break;
+ case 4:
+ Size = 2;
+ break;
+ case 8:
+ Size = 3;
+ break;
+ }
+
+ SrcIndex = imm4 >> Size;
+ DstIndex = imm5 >> (Size + 1);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
new file mode 100644
index 0000000..14a9d7c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemImm : OpCodeMemImm, IOpCodeSimd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemImm(inst, address, opCode);
+
+ public OpCodeSimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size |= (opCode >> 21) & 4;
+
+ if (Size > 4)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ // Base class already shifts the immediate, we only
+ // need to shift it if size (scale) is 4, since this value is only set here.
+ if (!WBack && !Unscaled && Size == 4)
+ {
+ Immediate <<= 4;
+ }
+
+ Extend64 = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
new file mode 100644
index 0000000..efa558b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemLit : OpCode, IOpCodeSimd, IOpCodeLit
+ {
+ public int Rt { get; }
+ public long Immediate { get; }
+ public int Size { get; }
+ public bool Signed => false;
+ public bool Prefetch => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemLit(inst, address, opCode);
+
+ public OpCodeSimdMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int opc = (opCode >> 30) & 3;
+
+ if (opc == 3)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ Size = opc + 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
new file mode 100644
index 0000000..c05b524
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
@@ -0,0 +1,71 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemMs : OpCodeMemReg, IOpCodeSimd
+ {
+ public int Reps { get; }
+ public int SElems { get; }
+ public int Elems { get; }
+ public bool WBack { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemMs(inst, address, opCode);
+
+ public OpCodeSimdMemMs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch ((opCode >> 12) & 0xf)
+ {
+ case 0b0000:
+ Reps = 1;
+ SElems = 4;
+ break;
+ case 0b0010:
+ Reps = 4;
+ SElems = 1;
+ break;
+ case 0b0100:
+ Reps = 1;
+ SElems = 3;
+ break;
+ case 0b0110:
+ Reps = 3;
+ SElems = 1;
+ break;
+ case 0b0111:
+ Reps = 1;
+ SElems = 1;
+ break;
+ case 0b1000:
+ Reps = 1;
+ SElems = 2;
+ break;
+ case 0b1010:
+ Reps = 2;
+ SElems = 1;
+ break;
+
+ default:
+ Instruction = InstDescriptor.Undefined;
+ return;
+ }
+
+ Size = (opCode >> 10) & 3;
+ WBack = ((opCode >> 23) & 1) != 0;
+
+ bool q = ((opCode >> 30) & 1) != 0;
+
+ if (!q && Size == 3 && SElems != 1)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Extend64 = false;
+
+ RegisterSize = q
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+
+ Elems = (GetBitsCount() >> 3) >> Size;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
new file mode 100644
index 0000000..6971638
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemPair : OpCodeMemPair, IOpCodeSimd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemPair(inst, address, opCode);
+
+ public OpCodeSimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size = ((opCode >> 30) & 3) + 2;
+
+ Extend64 = false;
+
+ DecodeImm(opCode);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
new file mode 100644
index 0000000..be7b25b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemReg : OpCodeMemReg, IOpCodeSimd
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemReg(inst, address, opCode);
+
+ public OpCodeSimdMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size |= (opCode >> 21) & 4;
+
+ if (Size > 4)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Extend64 = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
new file mode 100644
index 0000000..5bc614e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
@@ -0,0 +1,97 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemSs : OpCodeMemReg, IOpCodeSimd
+ {
+ public int SElems { get; }
+ public int Index { get; }
+ public bool Replicate { get; }
+ public bool WBack { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemSs(inst, address, opCode);
+
+ public OpCodeSimdMemSs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int size = (opCode >> 10) & 3;
+ int s = (opCode >> 12) & 1;
+ int sElems = (opCode >> 12) & 2;
+ int scale = (opCode >> 14) & 3;
+ int l = (opCode >> 22) & 1;
+ int q = (opCode >> 30) & 1;
+
+ sElems |= (opCode >> 21) & 1;
+
+ sElems++;
+
+ int index = (q << 3) | (s << 2) | size;
+
+ switch (scale)
+ {
+ case 1:
+ {
+ if ((size & 1) != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ index >>= 1;
+
+ break;
+ }
+
+ case 2:
+ {
+ if ((size & 2) != 0 ||
+ ((size & 1) != 0 && s != 0))
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ if ((size & 1) != 0)
+ {
+ index >>= 3;
+
+ scale = 3;
+ }
+ else
+ {
+ index >>= 2;
+ }
+
+ break;
+ }
+
+ case 3:
+ {
+ if (l == 0 || s != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ scale = size;
+
+ Replicate = true;
+
+ break;
+ }
+ }
+
+ Index = index;
+ SElems = sElems;
+ Size = scale;
+
+ Extend64 = false;
+
+ WBack = ((opCode >> 23) & 1) != 0;
+
+ RegisterSize = q != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdReg.cs b/src/ARMeilleure/Decoders/OpCodeSimdReg.cs
new file mode 100644
index 0000000..40f9b1c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdReg.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdReg : OpCodeSimd
+ {
+ public bool Bit3 { get; }
+ public int Ra { get; }
+ public int Rm { get; protected set; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdReg(inst, address, opCode);
+
+ public OpCodeSimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Bit3 = ((opCode >> 3) & 0x1) != 0;
+ Ra = (opCode >> 10) & 0x1f;
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs b/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
new file mode 100644
index 0000000..bb248ab
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
@@ -0,0 +1,33 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdRegElem : OpCodeSimdReg
+ {
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdRegElem(inst, address, opCode);
+
+ public OpCodeSimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch (Size)
+ {
+ case 1:
+ Index = (opCode >> 20) & 3 |
+ (opCode >> 9) & 4;
+
+ Rm &= 0xf;
+
+ break;
+
+ case 2:
+ Index = (opCode >> 21) & 1 |
+ (opCode >> 10) & 2;
+
+ break;
+
+ default:
+ Instruction = InstDescriptor.Undefined;
+ break;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs b/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
new file mode 100644
index 0000000..c97bd78
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
@@ -0,0 +1,35 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdRegElemF : OpCodeSimdReg
+ {
+ public int Index { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdRegElemF(inst, address, opCode);
+
+ public OpCodeSimdRegElemF(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch ((opCode >> 21) & 3) // sz:L
+ {
+ case 0: // H:0
+ Index = (opCode >> 10) & 2; // 0, 2
+
+ break;
+
+ case 1: // H:1
+ Index = (opCode >> 10) & 2;
+ Index++; // 1, 3
+
+ break;
+
+ case 2: // H
+ Index = (opCode >> 11) & 1; // 0, 1
+
+ break;
+
+ default:
+ Instruction = InstDescriptor.Undefined;
+ break;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs
new file mode 100644
index 0000000..7064f1d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdShImm : OpCodeSimd
+ {
+ public int Imm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdShImm(inst, address, opCode);
+
+ public OpCodeSimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Imm = (opCode >> 16) & 0x7f;
+
+ Size = BitUtils.HighestBitSetNibble(Imm >> 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs b/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs
new file mode 100644
index 0000000..3a7ef6a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdTbl : OpCodeSimdReg
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdTbl(inst, address, opCode);
+
+ public OpCodeSimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size = ((opCode >> 13) & 3) + 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeSystem.cs b/src/ARMeilleure/Decoders/OpCodeSystem.cs
new file mode 100644
index 0000000..2151341
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeSystem.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSystem : OpCode
+ {
+ public int Rt { get; }
+ public int Op2 { get; }
+ public int CRm { get; }
+ public int CRn { get; }
+ public int Op1 { get; }
+ public int Op0 { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSystem(inst, address, opCode);
+
+ public OpCodeSystem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 0x1f;
+ Op2 = (opCode >> 5) & 0x7;
+ CRm = (opCode >> 8) & 0xf;
+ CRn = (opCode >> 12) & 0xf;
+ Op1 = (opCode >> 16) & 0x7;
+ Op0 = ((opCode >> 19) & 0x1) | 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16.cs b/src/ARMeilleure/Decoders/OpCodeT16.cs
new file mode 100644
index 0000000..de946b9
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16 : OpCode32
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16(inst, address, opCode);
+
+ public OpCodeT16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Cond = Condition.Al;
+
+ IsThumb = true;
+ OpCodeSizeInBytes = 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs
new file mode 100644
index 0000000..683d638
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AddSubImm3 : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubImm3(inst, address, opCode);
+
+ public OpCodeT16AddSubImm3(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 3) & 0x7;
+ Immediate = (opCode >> 6) & 0x7;
+ IsRotated = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs
new file mode 100644
index 0000000..201fc8a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AddSubReg : OpCodeT16, IOpCode32AluReg
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubReg(inst, address, opCode);
+
+ public OpCodeT16AddSubReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 3) & 0x7;
+ Rm = (opCode >> 6) & 0x7;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs
new file mode 100644
index 0000000..b66fe0c
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs
@@ -0,0 +1,23 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AddSubSp : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd => RegisterAlias.Aarch32Sp;
+ public int Rn => RegisterAlias.Aarch32Sp;
+
+ public bool? SetFlags => false;
+
+ public int Immediate { get; }
+
+ public bool IsRotated => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubSp(inst, address, opCode);
+
+ public OpCodeT16AddSubSp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = ((opCode >> 0) & 0x7f) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16Adr.cs b/src/ARMeilleure/Decoders/OpCodeT16Adr.cs
new file mode 100644
index 0000000..03abd49
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16Adr.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16Adr : OpCodeT16, IOpCode32Adr
+ {
+ public int Rd { get; }
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16Adr(inst, address, opCode);
+
+ public OpCodeT16Adr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 7;
+
+ int imm = (opCode & 0xff) << 2;
+ Immediate = (int)(GetPc() & 0xfffffffc) + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs b/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
new file mode 100644
index 0000000..122698d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluImm8 : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluImm8(inst, address, opCode);
+
+ public OpCodeT16AluImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0x7;
+ Rn = (opCode >> 8) & 0x7;
+ Immediate = (opCode >> 0) & 0xff;
+ IsRotated = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs b/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs
new file mode 100644
index 0000000..f67a75f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluImmZero : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluImmZero(inst, address, opCode);
+
+ public OpCodeT16AluImmZero(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 3) & 0x7;
+ Immediate = 0;
+ IsRotated = false;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs b/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs
new file mode 100644
index 0000000..5458f65
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluRegHigh : OpCodeT16, IOpCode32AluReg
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluRegHigh(inst, address, opCode);
+
+ public OpCodeT16AluRegHigh(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = ((opCode >> 0) & 0x7) | ((opCode >> 4) & 0x8);
+ Rn = ((opCode >> 0) & 0x7) | ((opCode >> 4) & 0x8);
+ Rm = (opCode >> 3) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs b/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs
new file mode 100644
index 0000000..f86f48b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluRegLow : OpCodeT16, IOpCode32AluReg
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => null;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluRegLow(inst, address, opCode);
+
+ public OpCodeT16AluRegLow(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rn = (opCode >> 0) & 0x7;
+ Rm = (opCode >> 3) & 0x7;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs b/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs
new file mode 100644
index 0000000..11d3a8f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluUx : OpCodeT16, IOpCode32AluUx
+ {
+ public int Rm { get; }
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags => false;
+
+ public int RotateBits => 0;
+ public bool Add => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluUx(inst, address, opCode);
+
+ public OpCodeT16AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rm = (opCode >> 3) & 0x7;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs b/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs
new file mode 100644
index 0000000..5ed8a4e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BImm11 : OpCodeT16, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImm11(inst, address, opCode);
+
+ public OpCodeT16BImm11(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm = (opCode << 21) >> 20;
+ Immediate = GetPc() + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs b/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs
new file mode 100644
index 0000000..85318e5
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BImm8 : OpCodeT16, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImm8(inst, address, opCode);
+
+ public OpCodeT16BImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Cond = (Condition)((opCode >> 8) & 0xf);
+
+ int imm = (opCode << 24) >> 23;
+ Immediate = GetPc() + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs b/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs
new file mode 100644
index 0000000..68ebac7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BImmCmp : OpCodeT16, IOpCode32BImm
+ {
+ public int Rn { get; }
+
+ public long Immediate { get; }
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImmCmp(inst, address, opCode);
+
+ public OpCodeT16BImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 0) & 0x7;
+
+ int imm = ((opCode >> 2) & 0x3e) | ((opCode >> 3) & 0x40);
+ Immediate = (int)GetPc() + imm;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16BReg.cs b/src/ARMeilleure/Decoders/OpCodeT16BReg.cs
new file mode 100644
index 0000000..da2a007
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16BReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BReg : OpCodeT16, IOpCode32BReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BReg(inst, address, opCode);
+
+ public OpCodeT16BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 3) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16Exception.cs b/src/ARMeilleure/Decoders/OpCodeT16Exception.cs
new file mode 100644
index 0000000..8ccdf09
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16Exception.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16Exception : OpCodeT16, IOpCode32Exception
+ {
+ public int Id { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16Exception(inst, address, opCode);
+
+ public OpCodeT16Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Id = opCode & 0xFF;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs b/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs
new file mode 100644
index 0000000..ea435a7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs
@@ -0,0 +1,33 @@
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16IfThen : OpCodeT16
+ {
+ public Condition[] IfThenBlockConds { get; }
+
+ public int IfThenBlockSize { get { return IfThenBlockConds.Length; } }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16IfThen(inst, address, opCode);
+
+ public OpCodeT16IfThen(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ List conds = new();
+
+ int cond = (opCode >> 4) & 0xf;
+ int mask = opCode & 0xf;
+
+ conds.Add((Condition)cond);
+
+ while ((mask & 7) != 0)
+ {
+ int newLsb = (mask >> 3) & 1;
+ cond = (cond & 0xe) | newLsb;
+ mask <<= 1;
+ conds.Add((Condition)cond);
+ }
+
+ IfThenBlockConds = conds.ToArray();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs b/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs
new file mode 100644
index 0000000..e9b3839
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs
@@ -0,0 +1,48 @@
+using ARMeilleure.Instructions;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemImm5 : OpCodeT16, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemImm5(inst, address, opCode);
+
+ public OpCodeT16MemImm5(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 7;
+ Rn = (opCode >> 3) & 7;
+
+ switch (inst.Name)
+ {
+ case InstName.Ldr:
+ case InstName.Ldrb:
+ case InstName.Ldrh:
+ IsLoad = true;
+ break;
+ case InstName.Str:
+ case InstName.Strb:
+ case InstName.Strh:
+ IsLoad = false;
+ break;
+ }
+
+ Immediate = inst.Name switch
+ {
+ InstName.Str or InstName.Ldr => ((opCode >> 6) & 0x1f) << 2,
+ InstName.Strb or InstName.Ldrb => ((opCode >> 6) & 0x1f),
+ InstName.Strh or InstName.Ldrh => ((opCode >> 6) & 0x1f) << 1,
+ _ => throw new InvalidOperationException(),
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs b/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs
new file mode 100644
index 0000000..63a452a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs
@@ -0,0 +1,26 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemLit : OpCodeT16, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn => RegisterAlias.Aarch32Pc;
+
+ public bool WBack => false;
+ public bool IsLoad => true;
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemLit(inst, address, opCode);
+
+ public OpCodeT16MemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 8) & 7;
+
+ Immediate = (opCode & 0xff) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs b/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs
new file mode 100644
index 0000000..92b027a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs
@@ -0,0 +1,34 @@
+using ARMeilleure.Instructions;
+using System;
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemMult : OpCodeT16, IOpCode32MemMult
+ {
+ public int Rn { get; }
+ public int RegisterMask { get; }
+ public int PostOffset { get; }
+ public bool IsLoad { get; }
+ public int Offset { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemMult(inst, address, opCode);
+
+ public OpCodeT16MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ RegisterMask = opCode & 0xff;
+ Rn = (opCode >> 8) & 7;
+
+ int regCount = BitOperations.PopCount((uint)RegisterMask);
+
+ Offset = 0;
+ PostOffset = 4 * regCount;
+ IsLoad = inst.Name switch
+ {
+ InstName.Ldm => true,
+ InstName.Stm => false,
+ _ => throw new InvalidOperationException(),
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs b/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs
new file mode 100644
index 0000000..17d6966
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemReg : OpCodeT16, IOpCode32MemReg
+ {
+ public int Rm { get; }
+ public int Rt { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate => throw new System.InvalidOperationException();
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemReg(inst, address, opCode);
+
+ public OpCodeT16MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 7;
+ Rn = (opCode >> 3) & 7;
+ Rm = (opCode >> 6) & 7;
+
+ IsLoad = ((opCode >> 9) & 7) >= 3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs b/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs
new file mode 100644
index 0000000..ed42679
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs
@@ -0,0 +1,28 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemSp : OpCodeT16, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn => RegisterAlias.Aarch32Sp;
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemSp(inst, address, opCode);
+
+ public OpCodeT16MemSp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 8) & 7;
+
+ IsLoad = ((opCode >> 11) & 1) != 0;
+
+ Immediate = ((opCode >> 0) & 0xff) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs b/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs
new file mode 100644
index 0000000..28d5db4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs
@@ -0,0 +1,42 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.State;
+using System;
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16MemStack : OpCodeT16, IOpCode32MemMult
+ {
+ public int Rn => RegisterAlias.Aarch32Sp;
+ public int RegisterMask { get; }
+ public int PostOffset { get; }
+ public bool IsLoad { get; }
+ public int Offset { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemStack(inst, address, opCode);
+
+ public OpCodeT16MemStack(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int extra = (opCode >> 8) & 1;
+ int regCount = BitOperations.PopCount((uint)opCode & 0x1ff);
+
+ switch (inst.Name)
+ {
+ case InstName.Push:
+ RegisterMask = (opCode & 0xff) | (extra << 14);
+ IsLoad = false;
+ Offset = -4 * regCount;
+ PostOffset = -4 * regCount;
+ break;
+ case InstName.Pop:
+ RegisterMask = (opCode & 0xff) | (extra << 15);
+ IsLoad = true;
+ Offset = 0;
+ PostOffset = 4 * regCount;
+ break;
+ default:
+ throw new InvalidOperationException();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs b/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs
new file mode 100644
index 0000000..18e7b9e
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs
@@ -0,0 +1,24 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16ShiftImm : OpCodeT16, IOpCode32AluRsImm
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+
+ public int Immediate { get; }
+ public ShiftType ShiftType { get; }
+
+ public bool? SetFlags => null;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16ShiftImm(inst, address, opCode);
+
+ public OpCodeT16ShiftImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x7;
+ Rm = (opCode >> 3) & 0x7;
+ Immediate = (opCode >> 6) & 0x1F;
+ ShiftType = (ShiftType)((opCode >> 11) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs b/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs
new file mode 100644
index 0000000..ce47dfb
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16ShiftReg : OpCodeT16, IOpCode32AluRsReg
+ {
+ public int Rm { get; }
+ public int Rs { get; }
+ public int Rd { get; }
+
+ public int Rn { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public bool? SetFlags => null;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16ShiftReg(inst, address, opCode);
+
+ public OpCodeT16ShiftReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 7;
+ Rm = (opCode >> 0) & 7;
+ Rn = (opCode >> 3) & 7;
+ Rs = (opCode >> 3) & 7;
+
+ ShiftType = (ShiftType)(((opCode >> 6) & 1) | ((opCode >> 7) & 2));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs b/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs
new file mode 100644
index 0000000..d737f5b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs
@@ -0,0 +1,24 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16SpRel : OpCodeT16, IOpCode32AluImm
+ {
+ public int Rd { get; }
+ public int Rn => RegisterAlias.Aarch32Sp;
+
+ public bool? SetFlags => false;
+
+ public int Immediate { get; }
+
+ public bool IsRotated => false;
+
+ public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16SpRel(inst, address, opCode);
+
+ public OpCodeT16SpRel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0x7;
+ Immediate = ((opCode >> 0) & 0xff) << 2;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32.cs b/src/ARMeilleure/Decoders/OpCodeT32.cs
new file mode 100644
index 0000000..87a0520
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32 : OpCode32
+ {
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32(inst, address, opCode);
+
+ public OpCodeT32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Cond = Condition.Al;
+
+ IsThumb = true;
+ OpCodeSizeInBytes = 4;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32Alu.cs b/src/ARMeilleure/Decoders/OpCodeT32Alu.cs
new file mode 100644
index 0000000..cdef007
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32Alu.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32Alu : OpCodeT32, IOpCode32Alu
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public bool? SetFlags { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32Alu(inst, address, opCode);
+
+ public OpCodeT32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ SetFlags = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs b/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs
new file mode 100644
index 0000000..57ad422
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluBf : OpCodeT32, IOpCode32AluBf
+ {
+ public int Rd { get; }
+ public int Rn { get; }
+
+ public int Msb { get; }
+ public int Lsb { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluBf(inst, address, opCode);
+
+ public OpCodeT32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 8) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Msb = (opCode >> 0) & 0x1f;
+ Lsb = ((opCode >> 6) & 0x3) | ((opCode >> 10) & 0x1c);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs b/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs
new file mode 100644
index 0000000..ce88964
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs
@@ -0,0 +1,38 @@
+using ARMeilleure.Common;
+using System.Runtime.Intrinsics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluImm : OpCodeT32Alu, IOpCode32AluImm
+ {
+ public int Immediate { get; }
+
+ public bool IsRotated { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluImm(inst, address, opCode);
+
+ private static readonly Vector128 _factor = Vector128.Create(1, 0x00010001, 0x01000100, 0x01010101);
+
+ public OpCodeT32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm8 = (opCode >> 0) & 0xff;
+ int imm3 = (opCode >> 12) & 7;
+ int imm1 = (opCode >> 26) & 1;
+
+ int imm12 = imm8 | (imm3 << 8) | (imm1 << 11);
+
+ if ((imm12 >> 10) == 0)
+ {
+ Immediate = imm8 * _factor.GetElement((imm12 >> 8) & 3);
+ IsRotated = false;
+ }
+ else
+ {
+ int shift = imm12 >> 7;
+
+ Immediate = BitUtils.RotateRight(0x80 | (imm12 & 0x7f), shift, 32);
+ IsRotated = shift != 0;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs b/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs
new file mode 100644
index 0000000..12b65a1
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluImm12 : OpCodeT32Alu, IOpCode32AluImm
+ {
+ public int Immediate { get; }
+
+ public bool IsRotated => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluImm12(inst, address, opCode);
+
+ public OpCodeT32AluImm12(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (opCode & 0xff) | ((opCode >> 4) & 0x700) | ((opCode >> 15) & 0x800);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs b/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs
new file mode 100644
index 0000000..6cb604d
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluMla : OpCodeT32, IOpCode32AluMla
+ {
+ public int Rn { get; }
+ public int Rm { get; }
+ public int Ra { get; }
+ public int Rd { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+ public bool R { get; }
+ public bool? SetFlags => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluMla(inst, address, opCode);
+
+ public OpCodeT32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rd = (opCode >> 8) & 0xf;
+ Ra = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ R = (opCode & (1 << 4)) != 0;
+
+ MHigh = ((opCode >> 4) & 0x1) == 1;
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs b/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs
new file mode 100644
index 0000000..4ac9834
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluReg : OpCodeT32Alu, IOpCode32AluReg
+ {
+ public int Rm { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluReg(inst, address, opCode);
+
+ public OpCodeT32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs b/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs
new file mode 100644
index 0000000..dad0d95
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluRsImm : OpCodeT32Alu, IOpCode32AluRsImm
+ {
+ public int Rm { get; }
+ public int Immediate { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluRsImm(inst, address, opCode);
+
+ public OpCodeT32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Immediate = ((opCode >> 6) & 3) | ((opCode >> 10) & 0x1c);
+
+ ShiftType = (ShiftType)((opCode >> 4) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs b/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs
new file mode 100644
index 0000000..a1b2e61
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluUmull : OpCodeT32, IOpCode32AluUmull
+ {
+ public int RdLo { get; }
+ public int RdHi { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+
+ public bool NHigh { get; }
+ public bool MHigh { get; }
+
+ public bool? SetFlags => false;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluUmull(inst, address, opCode);
+
+ public OpCodeT32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ RdHi = (opCode >> 8) & 0xf;
+ RdLo = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ MHigh = ((opCode >> 4) & 0x1) == 1;
+ NHigh = ((opCode >> 5) & 0x1) == 1;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs b/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs
new file mode 100644
index 0000000..861dc90
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32AluUx : OpCodeT32AluReg, IOpCode32AluUx
+ {
+ public int Rotate { get; }
+ public int RotateBits => Rotate * 8;
+ public bool Add => Rn != RegisterAlias.Aarch32Pc;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluUx(inst, address, opCode);
+
+ public OpCodeT32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rotate = (opCode >> 4) & 0x3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs b/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs
new file mode 100644
index 0000000..793f826
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32BImm20 : OpCodeT32, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32BImm20(inst, address, opCode);
+
+ public OpCodeT32BImm20(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ uint pc = GetPc();
+
+ int imm11 = (opCode >> 0) & 0x7ff;
+ int j2 = (opCode >> 11) & 1;
+ int j1 = (opCode >> 13) & 1;
+ int imm6 = (opCode >> 16) & 0x3f;
+ int s = (opCode >> 26) & 1;
+
+ int imm32 = imm11 | (imm6 << 11) | (j1 << 17) | (j2 << 18) | (s << 19);
+ imm32 = (imm32 << 13) >> 12;
+
+ Immediate = pc + imm32;
+
+ Cond = (Condition)((opCode >> 22) & 0xf);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs b/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs
new file mode 100644
index 0000000..d35ab8a
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs
@@ -0,0 +1,35 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32BImm24 : OpCodeT32, IOpCode32BImm
+ {
+ public long Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32BImm24(inst, address, opCode);
+
+ public OpCodeT32BImm24(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ uint pc = GetPc();
+
+ if (inst.Name == InstName.Blx)
+ {
+ pc &= ~3u;
+ }
+
+ int imm11 = (opCode >> 0) & 0x7ff;
+ int j2 = (opCode >> 11) & 1;
+ int j1 = (opCode >> 13) & 1;
+ int imm10 = (opCode >> 16) & 0x3ff;
+ int s = (opCode >> 26) & 1;
+
+ int i1 = j1 ^ s ^ 1;
+ int i2 = j2 ^ s ^ 1;
+
+ int imm32 = imm11 | (imm10 << 11) | (i2 << 21) | (i1 << 22) | (s << 23);
+ imm32 = (imm32 << 8) >> 7;
+
+ Immediate = pc + imm32;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs
new file mode 100644
index 0000000..aac8dbf
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemImm12 : OpCodeT32, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm12(inst, address, opCode);
+
+ public OpCodeT32MemImm12(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Immediate = opCode & 0xfff;
+
+ IsLoad = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs
new file mode 100644
index 0000000..d80ce86
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemImm8 : OpCodeT32, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+ public bool WBack { get; }
+ public bool IsLoad { get; }
+ public bool Index { get; }
+ public bool Add { get; }
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm8(inst, address, opCode);
+
+ public OpCodeT32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Index = ((opCode >> 10) & 1) != 0;
+ Add = ((opCode >> 9) & 1) != 0;
+ WBack = ((opCode >> 8) & 1) != 0;
+
+ Immediate = opCode & 0xff;
+
+ IsLoad = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs
new file mode 100644
index 0000000..51f5042
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemImm8D : OpCodeT32, IOpCode32Mem
+ {
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Rn { get; }
+ public bool WBack { get; }
+ public bool IsLoad { get; }
+ public bool Index { get; }
+ public bool Add { get; }
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm8D(inst, address, opCode);
+
+ public OpCodeT32MemImm8D(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 8) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ Index = ((opCode >> 24) & 1) != 0;
+ Add = ((opCode >> 23) & 1) != 0;
+ WBack = ((opCode >> 21) & 1) != 0;
+
+ Immediate = (opCode & 0xff) << 2;
+
+ IsLoad = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs b/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs
new file mode 100644
index 0000000..c8eb36b
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs
@@ -0,0 +1,26 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemLdEx : OpCodeT32, IOpCode32MemEx
+ {
+ public int Rd => 0;
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad => true;
+ public bool Index => false;
+ public bool Add => false;
+
+ public int Immediate => 0;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemLdEx(inst, address, opCode);
+
+ public OpCodeT32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 8) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs b/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs
new file mode 100644
index 0000000..d155842
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs
@@ -0,0 +1,52 @@
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemMult : OpCodeT32, IOpCode32MemMult
+ {
+ public int Rn { get; }
+
+ public int RegisterMask { get; }
+ public int Offset { get; }
+ public int PostOffset { get; }
+
+ public bool IsLoad { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemMult(inst, address, opCode);
+
+ public OpCodeT32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ RegisterMask = opCode & 0xffff;
+
+ int regsSize = BitOperations.PopCount((uint)RegisterMask) * 4;
+
+ if (!u)
+ {
+ Offset -= regsSize;
+ }
+
+ if (u == p)
+ {
+ Offset += 4;
+ }
+
+ if (w)
+ {
+ PostOffset = u ? regsSize : -regsSize;
+ }
+ else
+ {
+ PostOffset = 0;
+ }
+
+ IsLoad = isLoad;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs b/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs
new file mode 100644
index 0000000..056d3b4
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs
@@ -0,0 +1,30 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemRsImm : OpCodeT32, IOpCode32MemRsImm
+ {
+ public int Rt { get; }
+ public int Rn { get; }
+ public int Rm { get; }
+ public ShiftType ShiftType => ShiftType.Lsl;
+
+ public bool WBack => false;
+ public bool IsLoad { get; }
+ public bool Index => true;
+ public bool Add => true;
+
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemRsImm(inst, address, opCode);
+
+ public OpCodeT32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ IsLoad = (opCode & (1 << 20)) != 0;
+
+ Immediate = (opCode >> 4) & 3;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs b/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs
new file mode 100644
index 0000000..6a0a6bb
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MemStEx : OpCodeT32, IOpCode32MemEx
+ {
+ public int Rd { get; }
+ public int Rt { get; }
+ public int Rt2 { get; }
+ public int Rn { get; }
+
+ public bool WBack => false;
+ public bool IsLoad => false;
+ public bool Index => false;
+ public bool Add => false;
+
+ public int Immediate => 0;
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemStEx(inst, address, opCode);
+
+ public OpCodeT32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0xf;
+ Rt2 = (opCode >> 8) & 0xf;
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs b/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs
new file mode 100644
index 0000000..2f871c7
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32MovImm16 : OpCodeT32Alu, IOpCode32AluImm16
+ {
+ public int Immediate { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MovImm16(inst, address, opCode);
+
+ public OpCodeT32MovImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (opCode & 0xff) | ((opCode >> 4) & 0x700) | ((opCode >> 15) & 0x800) | ((opCode >> 4) & 0xf000);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs b/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs
new file mode 100644
index 0000000..3605597
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32ShiftReg : OpCodeT32Alu, IOpCode32AluRsReg
+ {
+ public int Rm => Rn;
+ public int Rs { get; }
+
+ public ShiftType ShiftType { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32ShiftReg(inst, address, opCode);
+
+ public OpCodeT32ShiftReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rs = (opCode >> 0) & 0xf;
+
+ ShiftType = (ShiftType)((opCode >> 21) & 3);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeT32Tb.cs b/src/ARMeilleure/Decoders/OpCodeT32Tb.cs
new file mode 100644
index 0000000..0a4d2a6
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeT32Tb.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT32Tb : OpCodeT32, IOpCode32BReg
+ {
+ public int Rm { get; }
+ public int Rn { get; }
+
+ public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32Tb(inst, address, opCode);
+
+ public OpCodeT32Tb(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs
new file mode 100644
index 0000000..20d567f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/OpCodeTable.cs
@@ -0,0 +1,1528 @@
+using ARMeilleure.Instructions;
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+namespace ARMeilleure.Decoders
+{
+ static class OpCodeTable
+ {
+ public delegate OpCode MakeOp(InstDescriptor inst, ulong address, int opCode);
+
+ private const int FastLookupSize = 0x1000;
+
+ private readonly struct InstInfo
+ {
+ public int Mask { get; }
+ public int Value { get; }
+
+ public InstDescriptor Inst { get; }
+
+ public MakeOp MakeOp { get; }
+
+ public InstInfo(int mask, int value, InstDescriptor inst, MakeOp makeOp)
+ {
+ Mask = mask;
+ Value = value;
+ Inst = inst;
+ MakeOp = makeOp;
+ }
+ }
+
+ private static readonly List _allInstA32 = new();
+ private static readonly List _allInstT32 = new();
+ private static readonly List _allInstA64 = new();
+
+ private static readonly InstInfo[][] _instA32FastLookup = new InstInfo[FastLookupSize][];
+ private static readonly InstInfo[][] _instT32FastLookup = new InstInfo[FastLookupSize][];
+ private static readonly InstInfo[][] _instA64FastLookup = new InstInfo[FastLookupSize][];
+
+ static OpCodeTable()
+ {
+#pragma warning disable IDE0055 // Disable formatting
+ #region "OpCode Table (AArch64)"
+ // Base
+ SetA64("x0011010000xxxxx000000xxxxxxxxxx", InstName.Adc, InstEmit.Adc, OpCodeAluRs.Create);
+ SetA64("x0111010000xxxxx000000xxxxxxxxxx", InstName.Adcs, InstEmit.Adcs, OpCodeAluRs.Create);
+ SetA64("x00100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluImm.Create);
+ SetA64("00001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRs.Create);
+ SetA64("10001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRs.Create);
+ SetA64("x0001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRx.Create);
+ SetA64("x0001011001xxxxxxxx100xxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRx.Create);
+ SetA64("x01100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluImm.Create);
+ SetA64("00101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRs.Create);
+ SetA64("10101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRs.Create);
+ SetA64("x0101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRx.Create);
+ SetA64("x0101011001xxxxxxxx100xxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRx.Create);
+ SetA64("0xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adr, InstEmit.Adr, OpCodeAdr.Create);
+ SetA64("1xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adrp, InstEmit.Adrp, OpCodeAdr.Create);
+ SetA64("0001001000xxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluImm.Create);
+ SetA64("100100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluImm.Create);
+ SetA64("00001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluRs.Create);
+ SetA64("10001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluRs.Create);
+ SetA64("0111001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluImm.Create);
+ SetA64("111100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluImm.Create);
+ SetA64("01101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluRs.Create);
+ SetA64("11101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluRs.Create);
+ SetA64("x0011010110xxxxx001010xxxxxxxxxx", InstName.Asrv, InstEmit.Asrv, OpCodeAluRs.Create);
+ SetA64("000101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit.B, OpCodeBImmAl.Create);
+ SetA64("01010100xxxxxxxxxxxxxxxxxxx0xxxx", InstName.B_Cond, InstEmit.B_Cond, OpCodeBImmCond.Create);
+ SetA64("00110011000xxxxx0xxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, OpCodeBfm.Create);
+ SetA64("1011001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, OpCodeBfm.Create);
+ SetA64("00001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, OpCodeAluRs.Create);
+ SetA64("10001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, OpCodeAluRs.Create);
+ SetA64("01101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, OpCodeAluRs.Create);
+ SetA64("11101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, OpCodeAluRs.Create);
+ SetA64("100101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit.Bl, OpCodeBImmAl.Create);
+ SetA64("1101011000111111000000xxxxx00000", InstName.Blr, InstEmit.Blr, OpCodeBReg.Create);
+ SetA64("1101011000011111000000xxxxx00000", InstName.Br, InstEmit.Br, OpCodeBReg.Create);
+ SetA64("11010100001xxxxxxxxxxxxxxxx00000", InstName.Brk, InstEmit.Brk, OpCodeException.Create);
+ SetA64("x0110101xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbnz, InstEmit.Cbnz, OpCodeBImmCmp.Create);
+ SetA64("x0110100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbz, InstEmit.Cbz, OpCodeBImmCmp.Create);
+ SetA64("x0111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, OpCodeCcmpImm.Create);
+ SetA64("x0111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, OpCodeCcmpReg.Create);
+ SetA64("x1111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, OpCodeCcmpImm.Create);
+ SetA64("x1111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, OpCodeCcmpReg.Create);
+ SetA64("11010101000000110011xxxx01011111", InstName.Clrex, InstEmit.Clrex, OpCodeSystem.Create);
+ SetA64("x101101011000000000101xxxxxxxxxx", InstName.Cls, InstEmit.Cls, OpCodeAlu.Create);
+ SetA64("x101101011000000000100xxxxxxxxxx", InstName.Clz, InstEmit.Clz, OpCodeAlu.Create);
+ SetA64("00011010110xxxxx010000xxxxxxxxxx", InstName.Crc32b, InstEmit.Crc32b, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010001xxxxxxxxxx", InstName.Crc32h, InstEmit.Crc32h, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010010xxxxxxxxxx", InstName.Crc32w, InstEmit.Crc32w, OpCodeAluBinary.Create);
+ SetA64("10011010110xxxxx010011xxxxxxxxxx", InstName.Crc32x, InstEmit.Crc32x, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010100xxxxxxxxxx", InstName.Crc32cb, InstEmit.Crc32cb, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010101xxxxxxxxxx", InstName.Crc32ch, InstEmit.Crc32ch, OpCodeAluBinary.Create);
+ SetA64("00011010110xxxxx010110xxxxxxxxxx", InstName.Crc32cw, InstEmit.Crc32cw, OpCodeAluBinary.Create);
+ SetA64("10011010110xxxxx010111xxxxxxxxxx", InstName.Crc32cx, InstEmit.Crc32cx, OpCodeAluBinary.Create);
+ SetA64("11010101000000110010001010011111", InstName.Csdb, InstEmit.Csdb, OpCodeSystem.Create);
+ SetA64("x0011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csel, InstEmit.Csel, OpCodeCsel.Create);
+ SetA64("x0011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csinc, InstEmit.Csinc, OpCodeCsel.Create);
+ SetA64("x1011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csinv, InstEmit.Csinv, OpCodeCsel.Create);
+ SetA64("x1011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csneg, InstEmit.Csneg, OpCodeCsel.Create);
+ SetA64("11010101000000110011xxxx10111111", InstName.Dmb, InstEmit.Dmb, OpCodeSystem.Create);
+ SetA64("11010101000000110011xxxx10011111", InstName.Dsb, InstEmit.Dsb, OpCodeSystem.Create);
+ SetA64("01001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, OpCodeAluRs.Create);
+ SetA64("11001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, OpCodeAluRs.Create);
+ SetA64("0101001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluImm.Create);
+ SetA64("110100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluImm.Create);
+ SetA64("01001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluRs.Create);
+ SetA64("11001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluRs.Create);
+ SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create);
+ SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create);
+ SetA64("11010101000000110010000011011111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("11010101000000110010000011111111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("110101010000001100100001xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("1101010100000011001000100xx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("1101010100000011001000101>>11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("110101010000001100100011xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("11010101000000110010>>xxxxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
+ SetA64("11010101000000110011xxxx11011111", InstName.Isb, InstEmit.Isb, OpCodeSystem.Create);
+ SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", InstName.Ldar, InstEmit.Ldar, OpCodeMemEx.Create);
+ SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxp, InstEmit.Ldaxp, OpCodeMemEx.Create);
+ SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxr, InstEmit.Ldaxr, OpCodeMemEx.Create);
+ SetA64("<<10100xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, OpCodeMemPair.Create);
+ SetA64("xx111000010xxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemImm.Create);
+ SetA64("xx11100101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemImm.Create);
+ SetA64("xx111000011xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemReg.Create);
+ SetA64("xx011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, OpCodeMemLit.Create);
+ SetA64("0x1110001x0xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("0x1110011xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("10111000100xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("1011100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create);
+ SetA64("0x1110001x1xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemReg.Create);
+ SetA64("10111000101xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemReg.Create);
+ SetA64("xx001000010xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxr, InstEmit.Ldxr, OpCodeMemEx.Create);
+ SetA64("1x001000011xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxp, InstEmit.Ldxp, OpCodeMemEx.Create);
+ SetA64("x0011010110xxxxx001000xxxxxxxxxx", InstName.Lslv, InstEmit.Lslv, OpCodeAluRs.Create);
+ SetA64("x0011010110xxxxx001001xxxxxxxxxx", InstName.Lsrv, InstEmit.Lsrv, OpCodeAluRs.Create);
+ SetA64("x0011011000xxxxx0xxxxxxxxxxxxxxx", InstName.Madd, InstEmit.Madd, OpCodeMul.Create);
+ SetA64("0111001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, OpCodeMov.Create);
+ SetA64("111100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, OpCodeMov.Create);
+ SetA64("0001001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, OpCodeMov.Create);
+ SetA64("100100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, OpCodeMov.Create);
+ SetA64("0101001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, OpCodeMov.Create);
+ SetA64("110100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, OpCodeMov.Create);
+ SetA64("110101010011xxxxxxxxxxxxxxxxxxxx", InstName.Mrs, InstEmit.Mrs, OpCodeSystem.Create);
+ SetA64("110101010001xxxxxxxxxxxxxxxxxxxx", InstName.Msr, InstEmit.Msr, OpCodeSystem.Create);
+ SetA64("x0011011000xxxxx1xxxxxxxxxxxxxxx", InstName.Msub, InstEmit.Msub, OpCodeMul.Create);
+ SetA64("11010101000000110010000000011111", InstName.Nop, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("00101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, OpCodeAluRs.Create);
+ SetA64("10101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, OpCodeAluRs.Create);
+ SetA64("0011001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluImm.Create);
+ SetA64("101100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluImm.Create);
+ SetA64("00101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create);
+ SetA64("10101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create);
+ SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // immediate
+ SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // prfum (unscaled offset)
+ SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemLit.Create); // literal
+ SetA64("11111000101xxxxxxxxx10xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemReg.Create); // register
+ SetA64("x101101011000000000000xxxxxxxxxx", InstName.Rbit, InstEmit.Rbit, OpCodeAlu.Create);
+ SetA64("1101011001011111000000xxxxx00000", InstName.Ret, InstEmit.Ret, OpCodeBReg.Create);
+ SetA64("x101101011000000000001xxxxxxxxxx", InstName.Rev16, InstEmit.Rev16, OpCodeAlu.Create);
+ SetA64("x101101011000000000010xxxxxxxxxx", InstName.Rev32, InstEmit.Rev32, OpCodeAlu.Create);
+ SetA64("1101101011000000000011xxxxxxxxxx", InstName.Rev64, InstEmit.Rev64, OpCodeAlu.Create);
+ SetA64("x0011010110xxxxx001011xxxxxxxxxx", InstName.Rorv, InstEmit.Rorv, OpCodeAluRs.Create);
+ SetA64("x1011010000xxxxx000000xxxxxxxxxx", InstName.Sbc, InstEmit.Sbc, OpCodeAluRs.Create);
+ SetA64("x1111010000xxxxx000000xxxxxxxxxx", InstName.Sbcs, InstEmit.Sbcs, OpCodeAluRs.Create);
+ SetA64("00010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, OpCodeBfm.Create);
+ SetA64("1001001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, OpCodeBfm.Create);
+ SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, OpCodeAluBinary.Create);
+ SetA64("11010101000000110010000010011111", InstName.Sev, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("11010101000000110010000010111111", InstName.Sevl, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, OpCodeMul.Create);
+ SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, OpCodeMul.Create);
+ SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, OpCodeMul.Create);
+ SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, OpCodeMemEx.Create);
+ SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, OpCodeMemEx.Create);
+ SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, OpCodeMemEx.Create);
+ SetA64("x010100xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, OpCodeMemPair.Create);
+ SetA64("xx111000000xxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemImm.Create);
+ SetA64("xx11100100xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemImm.Create);
+ SetA64("xx111000001xxxxxxxxx10xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemReg.Create);
+ SetA64("1x001000001xxxxx0xxxxxxxxxxxxxxx", InstName.Stxp, InstEmit.Stxp, OpCodeMemEx.Create);
+ SetA64("xx001000000xxxxx0xxxxxxxxxxxxxxx", InstName.Stxr, InstEmit.Stxr, OpCodeMemEx.Create);
+ SetA64("x10100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluImm.Create);
+ SetA64("01001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRs.Create);
+ SetA64("11001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRs.Create);
+ SetA64("x1001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRx.Create);
+ SetA64("x1001011001xxxxxxxx100xxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRx.Create);
+ SetA64("x11100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluImm.Create);
+ SetA64("01101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRs.Create);
+ SetA64("11101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRs.Create);
+ SetA64("x1101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRx.Create);
+ SetA64("x1101011001xxxxxxxx100xxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRx.Create);
+ SetA64("11010100000xxxxxxxxxxxxxxxx00001", InstName.Svc, InstEmit.Svc, OpCodeException.Create);
+ SetA64("1101010100001xxxxxxxxxxxxxxxxxxx", InstName.Sys, InstEmit.Sys, OpCodeSystem.Create);
+ SetA64("x0110111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbnz, InstEmit.Tbnz, OpCodeBImmTest.Create);
+ SetA64("x0110110xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbz, InstEmit.Tbz, OpCodeBImmTest.Create);
+ SetA64("01010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, OpCodeBfm.Create);
+ SetA64("1101001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, OpCodeBfm.Create);
+ SetA64("x0011010110xxxxx000010xxxxxxxxxx", InstName.Udiv, InstEmit.Udiv, OpCodeAluBinary.Create);
+ SetA64("10011011101xxxxx0xxxxxxxxxxxxxxx", InstName.Umaddl, InstEmit.Umaddl, OpCodeMul.Create);
+ SetA64("10011011101xxxxx1xxxxxxxxxxxxxxx", InstName.Umsubl, InstEmit.Umsubl, OpCodeMul.Create);
+ SetA64("10011011110xxxxx0xxxxxxxxxxxxxxx", InstName.Umulh, InstEmit.Umulh, OpCodeMul.Create);
+ SetA64("11010101000000110010000001011111", InstName.Wfe, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("11010101000000110010000001111111", InstName.Wfi, InstEmit.Nop, OpCodeSystem.Create);
+ SetA64("11010101000000110010000000111111", InstName.Yield, InstEmit.Nop, OpCodeSystem.Create);
+
+ // FP & SIMD
+ SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S, InstEmit.Abs_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V, InstEmit.Abs_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S, InstEmit.Add_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V, InstEmit.Add_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V, InstEmit.Addhn_V, OpCodeSimdReg.Create);
+ SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S, InstEmit.Addp_S, OpCodeSimd.Create);
+ SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V, InstEmit.Addp_V, OpCodeSimdReg.Create);
+ SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, OpCodeSimd.Create);
+ SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V, InstEmit.Aesd_V, OpCodeSimd.Create);
+ SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, InstEmit.Aese_V, OpCodeSimd.Create);
+ SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, InstEmit.Aesimc_V, OpCodeSimd.Create);
+ SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, InstEmit.Aesmc_V, OpCodeSimd.Create);
+ SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V, InstEmit.And_V, OpCodeSimdReg.Create);
+ SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V, InstEmit.Bic_V, OpCodeSimdReg.Create);
+ SetA64("0x10111100000xxx0xx101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, OpCodeSimdImm.Create);
+ SetA64("0x10111100000xxx10x101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, OpCodeSimdImm.Create);
+ SetA64("0x101110111xxxxx000111xxxxxxxxxx", InstName.Bif_V, InstEmit.Bif_V, OpCodeSimdReg.Create);
+ SetA64("0x101110101xxxxx000111xxxxxxxxxx", InstName.Bit_V, InstEmit.Bit_V, OpCodeSimdReg.Create);
+ SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstName.Bsl_V, InstEmit.Bsl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100000010010xxxxxxxxxx", InstName.Cls_V, InstEmit.Cls_V, OpCodeSimd.Create);
+ SetA64("0x101110<<100000010010xxxxxxxxxx", InstName.Clz_V, InstEmit.Clz_V, OpCodeSimd.Create);
+ SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, OpCodeSimdReg.Create);
+ SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, OpCodeSimd.Create);
+ SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, OpCodeSimdReg.Create);
+ SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, OpCodeSimd.Create);
+ SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, OpCodeSimdReg.Create);
+ SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, OpCodeSimdReg.Create);
+ SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, OpCodeSimd.Create);
+ SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, OpCodeSimd.Create);
+ SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S, InstEmit.Cmhi_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V, InstEmit.Cmhi_V, OpCodeSimdReg.Create);
+ SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S, InstEmit.Cmhs_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V, InstEmit.Cmhs_V, OpCodeSimdReg.Create);
+ SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S, InstEmit.Cmle_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V, InstEmit.Cmle_V, OpCodeSimd.Create);
+ SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S, InstEmit.Cmlt_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, InstEmit.Cmlt_V, OpCodeSimd.Create);
+ SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, InstEmit.Cmtst_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, InstEmit.Cmtst_V, OpCodeSimdReg.Create);
+ SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, OpCodeSimd.Create);
+ SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, OpCodeSimdIns.Create);
+ SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, OpCodeSimdIns.Create);
+ SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, OpCodeSimdIns.Create);
+ SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, InstEmit.Eor_V, OpCodeSimdReg.Create);
+ SetA64("0>101110000xxxxx01011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, OpCodeSimdReg.Create);
+ SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, OpCodeSimd.Create);
+ SetA64("011111100x1xxxxx111011xxxxxxxxxx", InstName.Facge_S, InstEmit.Facge_S, OpCodeSimdReg.Create);
+ SetA64("0>1011100<1xxxxx111011xxxxxxxxxx", InstName.Facge_V, InstEmit.Facge_V, OpCodeSimdReg.Create);
+ SetA64("011111101x1xxxxx111011xxxxxxxxxx", InstName.Facgt_S, InstEmit.Facgt_S, OpCodeSimdReg.Create);
+ SetA64("0>1011101<1xxxxx111011xxxxxxxxxx", InstName.Facgt_V, InstEmit.Facgt_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, OpCodeSimdReg.Create);
+ SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, InstEmit.Faddp_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, InstEmit.Fccmp_S, OpCodeSimdFcond.Create);
+ SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, InstEmit.Fccmpe_S, OpCodeSimdFcond.Create);
+ SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, OpCodeSimdReg.Create);
+ SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, OpCodeSimd.Create);
+ SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, OpCodeSimdReg.Create);
+ SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, OpCodeSimd.Create);
+ SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, OpCodeSimdReg.Create);
+ SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, OpCodeSimdReg.Create);
+ SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, OpCodeSimd.Create);
+ SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, OpCodeSimdReg.Create);
+ SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, OpCodeSimd.Create);
+ SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, OpCodeSimdReg.Create);
+ SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, OpCodeSimd.Create);
+ SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S, InstEmit.Fcmle_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V, InstEmit.Fcmle_V, OpCodeSimd.Create);
+ SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S, InstEmit.Fcmlt_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, InstEmit.Fcmlt_V, OpCodeSimd.Create);
+ SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, InstEmit.Fcmp_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, InstEmit.Fcmpe_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, OpCodeSimdFcond.Create);
+ SetA64("00011110xx10001xx10000xxxxxxxxxx", InstName.Fcvt_S, InstEmit.Fcvt_S, OpCodeSimd.Create);
+ SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, InstEmit.Fcvtas_Gp, OpCodeSimdCvt.Create);
+ SetA64("010111100x100001110010xxxxxxxxxx", InstName.Fcvtas_S, InstEmit.Fcvtas_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001110010xxxxxxxxxx", InstName.Fcvtas_V, InstEmit.Fcvtas_V, OpCodeSimd.Create);
+ SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp, InstEmit.Fcvtau_Gp, OpCodeSimdCvt.Create);
+ SetA64("011111100x100001110010xxxxxxxxxx", InstName.Fcvtau_S, InstEmit.Fcvtau_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001110010xxxxxxxxxx", InstName.Fcvtau_V, InstEmit.Fcvtau_V, OpCodeSimd.Create);
+ SetA64("0x0011100x100001011110xxxxxxxxxx", InstName.Fcvtl_V, InstEmit.Fcvtl_V, OpCodeSimd.Create);
+ SetA64("x00111100x110000000000xxxxxxxxxx", InstName.Fcvtms_Gp, InstEmit.Fcvtms_Gp, OpCodeSimdCvt.Create);
+ SetA64("0>0011100<100001101110xxxxxxxxxx", InstName.Fcvtms_V, InstEmit.Fcvtms_V, OpCodeSimd.Create);
+ SetA64("x00111100x110001000000xxxxxxxxxx", InstName.Fcvtmu_Gp, InstEmit.Fcvtmu_Gp, OpCodeSimdCvt.Create);
+ SetA64("0x0011100x100001011010xxxxxxxxxx", InstName.Fcvtn_V, InstEmit.Fcvtn_V, OpCodeSimd.Create);
+ SetA64("x00111100x100000000000xxxxxxxxxx", InstName.Fcvtns_Gp, InstEmit.Fcvtns_Gp, OpCodeSimdCvt.Create);
+ SetA64("010111100x100001101010xxxxxxxxxx", InstName.Fcvtns_S, InstEmit.Fcvtns_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001101010xxxxxxxxxx", InstName.Fcvtns_V, InstEmit.Fcvtns_V, OpCodeSimd.Create);
+ SetA64("011111100x100001101010xxxxxxxxxx", InstName.Fcvtnu_S, InstEmit.Fcvtnu_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001101010xxxxxxxxxx", InstName.Fcvtnu_V, InstEmit.Fcvtnu_V, OpCodeSimd.Create);
+ SetA64("x00111100x101000000000xxxxxxxxxx", InstName.Fcvtps_Gp, InstEmit.Fcvtps_Gp, OpCodeSimdCvt.Create);
+ SetA64("x00111100x101001000000xxxxxxxxxx", InstName.Fcvtpu_Gp, InstEmit.Fcvtpu_Gp, OpCodeSimdCvt.Create);
+ SetA64("x00111100x111000000000xxxxxxxxxx", InstName.Fcvtzs_Gp, InstEmit.Fcvtzs_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstName.Fcvtzs_Gp_Fixed, InstEmit.Fcvtzs_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("010111101x100001101110xxxxxxxxxx", InstName.Fcvtzs_S, InstEmit.Fcvtzs_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001101110xxxxxxxxxx", InstName.Fcvtzs_V, InstEmit.Fcvtzs_V, OpCodeSimd.Create);
+ SetA64("0x001111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("x00111100x111001000000xxxxxxxxxx", InstName.Fcvtzu_Gp, InstEmit.Fcvtzu_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstName.Fcvtzu_Gp_Fixed, InstEmit.Fcvtzu_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("011111101x100001101110xxxxxxxxxx", InstName.Fcvtzu_S, InstEmit.Fcvtzu_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001101110xxxxxxxxxx", InstName.Fcvtzu_V, InstEmit.Fcvtzu_V, OpCodeSimd.Create);
+ SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstName.Fdiv_S, InstEmit.Fdiv_S, OpCodeSimdReg.Create);
+ SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V, InstEmit.Fdiv_V, OpCodeSimdReg.Create);
+ SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S, InstEmit.Fmadd_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S, InstEmit.Fmax_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, OpCodeSimdReg.Create);
+ SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create);
+ SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create);
+ SetA64("011111100x110000111110xxxxxxxxxx", InstName.Fmaxp_S, InstEmit.Fmaxp_S, OpCodeSimd.Create);
+ SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create);
+ SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create);
+ SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, OpCodeSimdReg.Create);
+ SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create);
+ SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create);
+ SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create);
+ SetA64("011111101x110000111110xxxxxxxxxx", InstName.Fminp_S, InstEmit.Fminp_S, OpCodeSimd.Create);
+ SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create);
+ SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create);
+ SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create);
+ SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, OpCodeSimdReg.Create);
+ SetA64("0>00111110011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, OpCodeSimdReg.Create);
+ SetA64("0>00111111011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V, InstEmit.Fmul_V, OpCodeSimdReg.Create);
+ SetA64("0>00111110011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V, InstEmit.Fmulx_V, OpCodeSimdReg.Create);
+ SetA64("0>10111111011101<100000111110xxxxxxxxxx", InstName.Fneg_V, InstEmit.Fneg_V, OpCodeSimd.Create);
+ SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S, InstEmit.Fnmadd_S, OpCodeSimdReg.Create);
+ SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S, InstEmit.Fnmsub_S, OpCodeSimdReg.Create);
+ SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S, InstEmit.Fnmul_S, OpCodeSimdReg.Create);
+ SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S, InstEmit.Frecpe_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V, InstEmit.Frecpe_V, OpCodeSimd.Create);
+ SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S, InstEmit.Frecps_S, OpCodeSimdReg.Create);
+ SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V, InstEmit.Frecps_V, OpCodeSimdReg.Create);
+ SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S, InstEmit.Frecpx_S, OpCodeSimd.Create);
+ SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S, InstEmit.Frinta_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V, InstEmit.Frinta_V, OpCodeSimd.Create);
+ SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S, InstEmit.Frinti_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V, InstEmit.Frinti_V, OpCodeSimd.Create);
+ SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S, InstEmit.Frintm_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V, InstEmit.Frintm_V, OpCodeSimd.Create);
+ SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S, InstEmit.Frintn_S, OpCodeSimd.Create);
+ SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V, InstEmit.Frintn_V, OpCodeSimd.Create);
+ SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S, InstEmit.Frintp_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V, InstEmit.Frintp_V, OpCodeSimd.Create);
+ SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S, InstEmit.Frintx_S, OpCodeSimd.Create);
+ SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V, InstEmit.Frintx_V, OpCodeSimd.Create);
+ SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S, InstEmit.Frintz_S, OpCodeSimd.Create);
+ SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V, InstEmit.Frintz_V, OpCodeSimd.Create);
+ SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S, InstEmit.Frsqrte_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V, InstEmit.Frsqrte_V, OpCodeSimd.Create);
+ SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S, InstEmit.Frsqrts_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V, InstEmit.Frsqrts_V, OpCodeSimdReg.Create);
+ SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S, InstEmit.Fsqrt_S, OpCodeSimd.Create);
+ SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, OpCodeSimd.Create);
+ SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, OpCodeSimdReg.Create);
+ SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, OpCodeSimdReg.Create);
+ SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, InstEmit.Ins_Gp, OpCodeSimdIns.Create);
+ SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, InstEmit.Ins_V, OpCodeSimdIns.Create);
+ SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, OpCodeSimdMemSs.Create);
+ SetA64("0x00110111xxxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, OpCodeSimdMemSs.Create);
+ SetA64("<<10110xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, OpCodeSimdMemPair.Create);
+ SetA64("xx111100x10xxxxxxxxx00xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x10xxxxxxxxx01xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x10xxxxxxxxx11xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111101x1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x11xxxxxx1xx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemReg.Create);
+ SetA64("xx011100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, OpCodeSimdMemLit.Create);
+ SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V, InstEmit.Mla_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create);
+ SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create);
+ SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create);
+ SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create);
+ SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, OpCodeSimd.Create);
+ SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, InstEmit.Not_V, OpCodeSimd.Create);
+ SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V, InstEmit.Orn_V, OpCodeSimdReg.Create);
+ SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, InstEmit.Orr_V, OpCodeSimdReg.Create);
+ SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create);
+ SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create);
+ SetA64("0x001110001xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create);
+ SetA64("0x001110111xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstName.Raddhn_V, InstEmit.Raddhn_V, OpCodeSimdReg.Create);
+ SetA64("0x10111001100000010110xxxxxxxxxx", InstName.Rbit_V, InstEmit.Rbit_V, OpCodeSimd.Create);
+ SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V, InstEmit.Rev16_V, OpCodeSimd.Create);
+ SetA64("0x1011100x100000000010xxxxxxxxxx", InstName.Rev32_V, InstEmit.Rev32_V, OpCodeSimd.Create);
+ SetA64("0x001110<<100000000010xxxxxxxxxx", InstName.Rev64_V, InstEmit.Rev64_V, OpCodeSimd.Create);
+ SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, InstEmit.Rshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, InstEmit.Sabal_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V, InstEmit.Sabd_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V, InstEmit.Sabdl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, OpCodeSimd.Create);
+ SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, OpCodeSimdReg.Create);
+ SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, InstEmit.Scvtf_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, InstEmit.Scvtf_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("010111100x100001110110xxxxxxxxxx", InstName.Scvtf_S, InstEmit.Scvtf_S, OpCodeSimd.Create);
+ SetA64("010111110>>xxxxx111001xxxxxxxxxx", InstName.Scvtf_S_Fixed, InstEmit.Scvtf_S_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0>0011100<100001110110xxxxxxxxxx", InstName.Scvtf_V, InstEmit.Scvtf_V, OpCodeSimd.Create);
+ SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V, InstEmit.Sha1c_V, OpCodeSimdReg.Create);
+ SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V, InstEmit.Sha1h_V, OpCodeSimd.Create);
+ SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V, InstEmit.Sha1m_V, OpCodeSimdReg.Create);
+ SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V, InstEmit.Sha1p_V, OpCodeSimdReg.Create);
+ SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V, InstEmit.Sha1su0_V, OpCodeSimdReg.Create);
+ SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V, InstEmit.Sha1su1_V, OpCodeSimd.Create);
+ SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V, InstEmit.Sha256h_V, OpCodeSimdReg.Create);
+ SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, InstEmit.Sha256h2_V, OpCodeSimdReg.Create);
+ SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, InstEmit.Sha256su0_V, OpCodeSimd.Create);
+ SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, InstEmit.Sha256su1_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, OpCodeSimdReg.Create);
+ SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, InstEmit.Shl_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, OpCodeSimd.Create);
+ SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, OpCodeSimdShImm.Create);
+ SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, OpCodeSimdReg.Create);
+ SetA64("0111111101xxxxxx010101xxxxxxxxxx", InstName.Sli_S, InstEmit.Sli_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, OpCodeSimdShImm.Create);
+ SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, InstEmit.Smaxp_V, OpCodeSimdReg.Create);
+ SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V, InstEmit.Smin_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V, InstEmit.Sminp_V, OpCodeSimdReg.Create);
+ SetA64("000011100x110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, OpCodeSimd.Create);
+ SetA64("01001110<<110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V, InstEmit.Smlal_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, InstEmit.Smov_S, OpCodeSimdIns.Create);
+ SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, OpCodeSimdReg.Create);
+ SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, OpCodeSimdRegElem.Create);
+ SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V, InstEmit.Sqabs_V, OpCodeSimd.Create);
+ SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S, InstEmit.Sqadd_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V, InstEmit.Sqadd_V, OpCodeSimdReg.Create);
+ SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, OpCodeSimdReg.Create);
+ SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, OpCodeSimdReg.Create);
+ SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x00111101xxxxxx1100x0xxxxxxxxxx", InstName.Sqdmulh_Ve, InstEmit.Sqdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x00111110xxxxxx1100x0xxxxxxxxxx", InstName.Sqdmulh_Ve, InstEmit.Sqdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S, InstEmit.Sqneg_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V, InstEmit.Sqneg_V, OpCodeSimd.Create);
+ SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, OpCodeSimdReg.Create);
+ SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, OpCodeSimdReg.Create);
+ SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, OpCodeSimdReg.Create);
+ SetA64("0x00111101xxxxxx1101x0xxxxxxxxxx", InstName.Sqrdmulh_Ve, InstEmit.Sqrdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x00111110xxxxxx1101x0xxxxxxxxxx", InstName.Sqrdmulh_Ve, InstEmit.Sqrdmulh_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, InstEmit.Sqrshl_V, OpCodeSimdReg.Create);
+ SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, InstEmit.Sqrshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, InstEmit.Sqrshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, InstEmit.Sqrshrun_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, InstEmit.Sqrshrun_V, OpCodeSimdShImm.Create);
+ SetA64("010111110>>>>xxx011101xxxxxxxxxx", InstName.Sqshl_Si, InstEmit.Sqshl_Si, OpCodeSimdShImm.Create);
+ SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, InstEmit.Sqshl_V, OpCodeSimdReg.Create);
+ SetA64("0000111100>>>xxx011101xxxxxxxxxx", InstName.Sqshl_Vi, InstEmit.Sqshl_Vi, OpCodeSimdShImm.Create);
+ SetA64("010011110>>>>xxx011101xxxxxxxxxx", InstName.Sqshl_Vi, InstEmit.Sqshl_Vi, OpCodeSimdShImm.Create);
+ SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, InstEmit.Sqshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, InstEmit.Sqshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, InstEmit.Sqshrun_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, InstEmit.Sqshrun_V, OpCodeSimdShImm.Create);
+ SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, InstEmit.Sqsub_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, InstEmit.Sqsub_V, OpCodeSimdReg.Create);
+ SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, InstEmit.Sqxtn_S, OpCodeSimd.Create);
+ SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V, InstEmit.Sqxtn_V, OpCodeSimd.Create);
+ SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, OpCodeSimd.Create);
+ SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, OpCodeSimd.Create);
+ SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, OpCodeSimdReg.Create);
+ SetA64("0111111101xxxxxx010001xxxxxxxxxx", InstName.Sri_S, InstEmit.Sri_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, OpCodeSimdShImm.Create);
+ SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, OpCodeSimdReg.Create);
+ SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, InstEmit.Srshr_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, OpCodeSimdShImm.Create);
+ SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, OpCodeSimdShImm.Create);
+ SetA64("01011110111xxxxx010001xxxxxxxxxx", InstName.Sshl_S, InstEmit.Sshl_S, OpCodeSimdReg.Create);
+ SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, OpCodeSimdReg.Create);
+ SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, OpCodeSimdShImm.Create);
+ SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, OpCodeSimdShImm.Create);
+ SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, InstEmit.Ssra_S, OpCodeSimdShImm.Create);
+ SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, OpCodeSimdShImm.Create);
+ SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, OpCodeSimdShImm.Create);
+ SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, InstEmit.Ssubl_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, InstEmit.Ssubw_V, OpCodeSimdReg.Create);
+ SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, OpCodeSimdMemMs.Create);
+ SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, OpCodeSimdMemSs.Create);
+ SetA64("0x00110110xxxxxxxxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, OpCodeSimdMemSs.Create);
+ SetA64("<<10110xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, OpCodeSimdMemPair.Create);
+ SetA64("xx111100x00xxxxxxxxx00xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x00xxxxxxxxx01xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x00xxxxxxxxx11xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111101x0xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create);
+ SetA64("xx111100x01xxxxxx1xx10xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemReg.Create);
+ SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S, InstEmit.Sub_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V, InstEmit.Sub_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, OpCodeSimdReg.Create);
+ SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, OpCodeSimd.Create);
+ SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, OpCodeSimd.Create);
+ SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, InstEmit.Tbl_V, OpCodeSimdTbl.Create);
+ SetA64("0x001110000xxxxx0xx100xxxxxxxxxx", InstName.Tbx_V, InstEmit.Tbx_V, OpCodeSimdTbl.Create);
+ SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, InstEmit.Trn1_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, InstEmit.Trn2_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V, InstEmit.Uabdl_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V, InstEmit.Uadalp_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V, InstEmit.Uaddl_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V, InstEmit.Uaddlp_V, OpCodeSimd.Create);
+ SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, OpCodeSimd.Create);
+ SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V, InstEmit.Uaddw_V, OpCodeSimdReg.Create);
+ SetA64("x00111100x100011000000xxxxxxxxxx", InstName.Ucvtf_Gp, InstEmit.Ucvtf_Gp, OpCodeSimdCvt.Create);
+ SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstName.Ucvtf_Gp_Fixed, InstEmit.Ucvtf_Gp_Fixed, OpCodeSimdCvt.Create);
+ SetA64("011111100x100001110110xxxxxxxxxx", InstName.Ucvtf_S, InstEmit.Ucvtf_S, OpCodeSimd.Create);
+ SetA64("011111110>>xxxxx111001xxxxxxxxxx", InstName.Ucvtf_S_Fixed, InstEmit.Ucvtf_S_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0>1011100<100001110110xxxxxxxxxx", InstName.Ucvtf_V, InstEmit.Ucvtf_V, OpCodeSimd.Create);
+ SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V, InstEmit.Uhadd_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V, InstEmit.Uhsub_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V, InstEmit.Umax_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V, InstEmit.Umaxp_V, OpCodeSimdReg.Create);
+ SetA64("001011100x110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, OpCodeSimd.Create);
+ SetA64("01101110<<110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V, InstEmit.Umin_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V, InstEmit.Uminp_V, OpCodeSimdReg.Create);
+ SetA64("001011100x110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, OpCodeSimd.Create);
+ SetA64("01101110<<110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V, InstEmit.Umlal_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, InstEmit.Umlal_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, InstEmit.Umlsl_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, InstEmit.Umlsl_Ve, OpCodeSimdRegElem.Create);
+ SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, InstEmit.Umov_S, OpCodeSimdIns.Create);
+ SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, InstEmit.Umull_V, OpCodeSimdReg.Create);
+ SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, OpCodeSimdRegElem.Create);
+ SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, InstEmit.Uqadd_V, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, InstEmit.Uqrshl_V, OpCodeSimdReg.Create);
+ SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, InstEmit.Uqrshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, InstEmit.Uqrshrn_V, OpCodeSimdShImm.Create);
+ SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, InstEmit.Uqshl_V, OpCodeSimdReg.Create);
+ SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, InstEmit.Uqshrn_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, InstEmit.Uqshrn_V, OpCodeSimdShImm.Create);
+ SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, InstEmit.Uqsub_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, InstEmit.Uqsub_V, OpCodeSimdReg.Create);
+ SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, InstEmit.Uqxtn_S, OpCodeSimd.Create);
+ SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, InstEmit.Uqxtn_V, OpCodeSimd.Create);
+ SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, InstEmit.Urhadd_V, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, InstEmit.Urshl_V, OpCodeSimdReg.Create);
+ SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, InstEmit.Urshr_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, OpCodeSimdShImm.Create);
+ SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, OpCodeSimdShImm.Create);
+ SetA64("01111110111xxxxx010001xxxxxxxxxx", InstName.Ushl_S, InstEmit.Ushl_S, OpCodeSimdReg.Create);
+ SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, OpCodeSimdReg.Create);
+ SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, OpCodeSimdShImm.Create);
+ SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, OpCodeSimdShImm.Create);
+ SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, InstEmit.Usqadd_S, OpCodeSimd.Create);
+ SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, InstEmit.Usqadd_V, OpCodeSimd.Create);
+ SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, InstEmit.Usra_S, OpCodeSimdShImm.Create);
+ SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, OpCodeSimdShImm.Create);
+ SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, OpCodeSimdShImm.Create);
+ SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, OpCodeSimdReg.Create);
+ SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, InstEmit.Uzp2_V, OpCodeSimdReg.Create);
+ SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, InstEmit.Xtn_V, OpCodeSimd.Create);
+ SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, InstEmit.Zip1_V, OpCodeSimdReg.Create);
+ SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, InstEmit.Zip2_V, OpCodeSimdReg.Create);
+ #endregion
+
+ #region "OpCode Table (AArch32, A32)"
+ // Base
+ SetA32("<<<<0010101xxxxxxxxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluImm.Create);
+ SetA32("<<<<0000101xxxxxxxxxxxxxxxx0xxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000101xxxxxxxxxxxxx0xx1xxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluRsReg.Create);
+ SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCode32AluImm.Create);
+ SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000100xxxxxxxxxxxxx0xx1xxxx", InstName.Add, InstEmit32.Add, OpCode32AluRsReg.Create);
+ SetA32("<<<<0010000xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit32.And, OpCode32AluImm.Create);
+ SetA32("<<<<0000000xxxxxxxxxxxxxxxx0xxxx", InstName.And, InstEmit32.And, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000000xxxxxxxxxxxxx0xx1xxxx", InstName.And, InstEmit32.And, OpCode32AluRsReg.Create);
+ SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, OpCode32BImm.Create);
+ SetA32("<<<<0111110xxxxxxxxxxxxxx0011111", InstName.Bfc, InstEmit32.Bfc, OpCode32AluBf.Create);
+ SetA32("<<<<0111110xxxxxxxxxxxxxx001xxxx", InstName.Bfi, InstEmit32.Bfi, OpCode32AluBf.Create);
+ SetA32("<<<<0011110xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluImm.Create);
+ SetA32("<<<<0001110xxxxxxxxxxxxxxxx0xxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001110xxxxxxxxxxxxx0xx1xxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluRsReg.Create);
+ SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, OpCode32BImm.Create);
+ SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, OpCode32BImm.Create);
+ SetA32("<<<<000100101111111111110011xxxx", InstName.Blx, InstEmit32.Blxr, OpCode32BReg.Create);
+ SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, OpCode32BReg.Create);
+ SetA32("11110101011111111111000000011111", InstName.Clrex, InstEmit32.Clrex, OpCode32.Create);
+ SetA32("<<<<000101101111xxxx11110001xxxx", InstName.Clz, InstEmit32.Clz, OpCode32AluReg.Create);
+ SetA32("<<<<00110111xxxx0000xxxxxxxxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluImm.Create);
+ SetA32("<<<<00010111xxxx0000xxxxxxx0xxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010111xxxx0000xxxx0xx1xxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluRsReg.Create);
+ SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluImm.Create);
+ SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010101xxxx0000xxxx0xx1xxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluRsReg.Create);
+ SetA32("<<<<00010000xxxxxxxx00000100xxxx", InstName.Crc32b, InstEmit32.Crc32b, OpCode32AluReg.Create);
+ SetA32("<<<<00010000xxxxxxxx00100100xxxx", InstName.Crc32cb, InstEmit32.Crc32cb, OpCode32AluReg.Create);
+ SetA32("<<<<00010010xxxxxxxx00100100xxxx", InstName.Crc32ch, InstEmit32.Crc32ch, OpCode32AluReg.Create);
+ SetA32("<<<<00010100xxxxxxxx00100100xxxx", InstName.Crc32cw, InstEmit32.Crc32cw, OpCode32AluReg.Create);
+ SetA32("<<<<00010010xxxxxxxx00000100xxxx", InstName.Crc32h, InstEmit32.Crc32h, OpCode32AluReg.Create);
+ SetA32("<<<<00010100xxxxxxxx00000100xxxx", InstName.Crc32w, InstEmit32.Crc32w, OpCode32AluReg.Create);
+ SetA32("<<<<0011001000001111000000010100", InstName.Csdb, InstEmit32.Csdb, OpCode32.Create);
+ SetA32("1111010101111111111100000101xxxx", InstName.Dmb, InstEmit32.Dmb, OpCode32.Create);
+ SetA32("1111010101111111111100000100xxxx", InstName.Dsb, InstEmit32.Dsb, OpCode32.Create);
+ SetA32("<<<<0010001xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluImm.Create);
+ SetA32("<<<<0000001xxxxxxxxxxxxxxxx0xxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000001xxxxxxxxxxxxx0xx1xxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluRsReg.Create);
+ SetA32("<<<<0011001000001111000000010000", InstName.Esb, InstEmit32.Nop, OpCode32.Create); // Error Synchronization Barrier (FEAT_RAS)
+ SetA32("<<<<001100100000111100000000011x", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000001xxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000010001", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000010011", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000010101", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<001100100000111100000001011x", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000000011xxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<00110010000011110000001xxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<0011001000001111000001xxxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("<<<<001100100000111100001xxxxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint
+ SetA32("1111010101111111111100000110xxxx", InstName.Isb, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<00011001xxxxxxxx110010011111", InstName.Lda, InstEmit32.Lda, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011101xxxxxxxx110010011111", InstName.Ldab, InstEmit32.Ldab, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011001xxxxxxxx111010011111", InstName.Ldaex, InstEmit32.Ldaex, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011101xxxxxxxx111010011111", InstName.Ldaexb, InstEmit32.Ldaexb, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011011xxxxxxxx111010011111", InstName.Ldaexd, InstEmit32.Ldaexd, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011111xxxxxxxx111010011111", InstName.Ldaexh, InstEmit32.Ldaexh, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011111xxxxxxxx110010011111", InstName.Ldah, InstEmit32.Ldah, OpCode32MemLdEx.Create);
+ SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCode32MemMult.Create);
+ SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCode32MemImm.Create);
+ SetA32("<<<<011xx0x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldr, InstEmit32.Ldr, OpCode32MemRsImm.Create);
+ SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCode32MemImm.Create);
+ SetA32("<<<<011xx1x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCode32MemRsImm.Create);
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x0xxxxxxxx00001101xxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCode32MemReg.Create);
+ SetA32("<<<<00011001xxxxxxxx111110011111", InstName.Ldrex, InstEmit32.Ldrex, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011101xxxxxxxx111110011111", InstName.Ldrexb, InstEmit32.Ldrexb, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011011xxxxxxxx111110011111", InstName.Ldrexd, InstEmit32.Ldrexd, OpCode32MemLdEx.Create);
+ SetA32("<<<<00011111xxxxxxxx111110011111", InstName.Ldrexh, InstEmit32.Ldrexh, OpCode32MemLdEx.Create);
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x1xxxxxxxx00001011xxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCode32MemReg.Create);
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x1xxxxxxxx00001101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCode32MemReg.Create);
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x1xxxxxxxx00001111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCode32MemReg.Create);
+ SetA32("<<<<1110xxx0xxxxxxxx111xxxx1xxxx", InstName.Mcr, InstEmit32.Mcr, OpCode32System.Create);
+ SetA32("<<<<0000001xxxxxxxxxxxxx1001xxxx", InstName.Mla, InstEmit32.Mla, OpCode32AluMla.Create);
+ SetA32("<<<<00000110xxxxxxxxxxxx1001xxxx", InstName.Mls, InstEmit32.Mls, OpCode32AluMla.Create);
+ SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluImm.Create);
+ SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001101x0000xxxxxxxx0xx1xxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluRsReg.Create);
+ SetA32("<<<<00110000xxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluImm16.Create);
+ SetA32("<<<<00110100xxxxxxxxxxxxxxxxxxxx", InstName.Movt, InstEmit32.Movt, OpCode32AluImm16.Create);
+ SetA32("<<<<1110xxx1xxxxxxxx111xxxx1xxxx", InstName.Mrc, InstEmit32.Mrc, OpCode32System.Create);
+ SetA32("<<<<11000101xxxxxxxx111xxxxxxxxx", InstName.Mrrc, InstEmit32.Mrrc, OpCode32System.Create);
+ SetA32("<<<<00010x001111xxxx000000000000", InstName.Mrs, InstEmit32.Mrs, OpCode32Mrs.Create);
+ SetA32("<<<<00010x10xxxx111100000000xxxx", InstName.Msr, InstEmit32.Msr, OpCode32MsrReg.Create);
+ SetA32("<<<<0000000xxxxx0000xxxx1001xxxx", InstName.Mul, InstEmit32.Mul, OpCode32AluMla.Create);
+ SetA32("<<<<0011111x0000xxxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluImm.Create);
+ SetA32("<<<<0001111x0000xxxxxxxxxxx0xxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001111x0000xxxxxxxx0xx1xxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluRsReg.Create);
+ SetA32("<<<<0011001000001111000000000000", InstName.Nop, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011100xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluImm.Create);
+ SetA32("<<<<0001100xxxxxxxxxxxxxxxx0xxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluRsImm.Create);
+ SetA32("<<<<0001100xxxxxxxxxxxxx0xx1xxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluRsReg.Create);
+ SetA32("<<<<01101000xxxxxxxxxxxxxx01xxxx", InstName.Pkh, InstEmit32.Pkh, OpCode32AluRsImm.Create);
+ SetA32("11110101xx01xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create);
+ SetA32("11110111xx01xxxx1111xxxxxxx0xxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<01100010xxxxxxxx11110001xxxx", InstName.Qadd16, InstEmit32.Qadd16, OpCode32AluReg.Create);
+ SetA32("<<<<011011111111xxxx11110011xxxx", InstName.Rbit, InstEmit32.Rbit, OpCode32AluReg.Create);
+ SetA32("<<<<011010111111xxxx11110011xxxx", InstName.Rev, InstEmit32.Rev, OpCode32AluReg.Create);
+ SetA32("<<<<011010111111xxxx11111011xxxx", InstName.Rev16, InstEmit32.Rev16, OpCode32AluReg.Create);
+ SetA32("<<<<011011111111xxxx11111011xxxx", InstName.Revsh, InstEmit32.Revsh, OpCode32AluReg.Create);
+ SetA32("<<<<0010011xxxxxxxxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluImm.Create);
+ SetA32("<<<<0000011xxxxxxxxxxxxxxxx0xxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000011xxxxxxxxxxxxx0xx1xxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluRsReg.Create);
+ SetA32("<<<<0010111xxxxxxxxxxxxxxxxxxxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluImm.Create);
+ SetA32("<<<<0000111xxxxxxxxxxxxxxxx0xxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000111xxxxxxxxxxxxx0xx1xxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluRsReg.Create);
+ SetA32("<<<<01100001xxxxxxxx11111001xxxx", InstName.Sadd8, InstEmit32.Sadd8, OpCode32AluReg.Create);
+ SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluImm.Create);
+ SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluRsReg.Create);
+ SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx, InstEmit32.Sbfx, OpCode32AluBf.Create);
+ SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv, InstEmit32.Sdiv, OpCode32AluMla.Create);
+ SetA32("<<<<01101000xxxxxxxx11111011xxxx", InstName.Sel, InstEmit32.Sel, OpCode32AluReg.Create);
+ SetA32("<<<<0011001000001111000000000100", InstName.Sev, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011001000001111000000000101", InstName.Sevl, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8, InstEmit32.Shadd8, OpCode32AluReg.Create);
+ SetA32("<<<<01100011xxxxxxxx11111111xxxx", InstName.Shsub8, InstEmit32.Shsub8, OpCode32AluReg.Create);
+ SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__, InstEmit32.Smla__, OpCode32AluMla.Create);
+ SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal, InstEmit32.Smlal, OpCode32AluUmull.Create);
+ SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCode32AluUmull.Create);
+ SetA32("<<<<00010010xxxxxxxxxxxx1x00xxxx", InstName.Smlaw_, InstEmit32.Smlaw_, OpCode32AluMla.Create);
+ SetA32("<<<<01110101xxxxxxxxxxxx00x1xxxx", InstName.Smmla, InstEmit32.Smmla, OpCode32AluMla.Create);
+ SetA32("<<<<01110101xxxxxxxxxxxx11x1xxxx", InstName.Smmls, InstEmit32.Smmls, OpCode32AluMla.Create);
+ SetA32("<<<<00010110xxxxxxxxxxxx1xx0xxxx", InstName.Smul__, InstEmit32.Smul__, OpCode32AluMla.Create);
+ SetA32("<<<<0000110xxxxxxxxxxxxx1001xxxx", InstName.Smull, InstEmit32.Smull, OpCode32AluUmull.Create);
+ SetA32("<<<<00010010xxxx0000xxxx1x10xxxx", InstName.Smulw_, InstEmit32.Smulw_, OpCode32AluMla.Create);
+ SetA32("<<<<0110101xxxxxxxxxxxxxxx01xxxx", InstName.Ssat, InstEmit32.Ssat, OpCode32Sat.Create);
+ SetA32("<<<<01101010xxxxxxxx11110011xxxx", InstName.Ssat16, InstEmit32.Ssat16, OpCode32Sat16.Create);
+ SetA32("<<<<01100001xxxxxxxx11111111xxxx", InstName.Ssub8, InstEmit32.Ssub8, OpCode32AluReg.Create);
+ SetA32("<<<<00011000xxxx111111001001xxxx", InstName.Stl, InstEmit32.Stl, OpCode32MemStEx.Create);
+ SetA32("<<<<00011100xxxx111111001001xxxx", InstName.Stlb, InstEmit32.Stlb, OpCode32MemStEx.Create);
+ SetA32("<<<<00011000xxxxxxxx11101001xxxx", InstName.Stlex, InstEmit32.Stlex, OpCode32MemStEx.Create);
+ SetA32("<<<<00011100xxxxxxxx11101001xxxx", InstName.Stlexb, InstEmit32.Stlexb, OpCode32MemStEx.Create);
+ SetA32("<<<<00011010xxxxxxxx11101001xxxx", InstName.Stlexd, InstEmit32.Stlexd, OpCode32MemStEx.Create);
+ SetA32("<<<<00011110xxxxxxxx11101001xxxx", InstName.Stlexh, InstEmit32.Stlexh, OpCode32MemStEx.Create);
+ SetA32("<<<<00011110xxxx111111001001xxxx", InstName.Stlh, InstEmit32.Stlh, OpCode32MemStEx.Create);
+ SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCode32MemMult.Create);
+ SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCode32MemImm.Create);
+ SetA32("<<<<011xx0x0xxxxxxxxxxxxxxx0xxxx", InstName.Str, InstEmit32.Str, OpCode32MemRsImm.Create);
+ SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCode32MemImm.Create);
+ SetA32("<<<<011xx1x0xxxxxxxxxxxxxxx0xxxx", InstName.Strb, InstEmit32.Strb, OpCode32MemRsImm.Create);
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x0xxxxxxxx00001111xxxx", InstName.Strd, InstEmit32.Strd, OpCode32MemReg.Create);
+ SetA32("<<<<00011000xxxxxxxx11111001xxxx", InstName.Strex, InstEmit32.Strex, OpCode32MemStEx.Create);
+ SetA32("<<<<00011100xxxxxxxx11111001xxxx", InstName.Strexb, InstEmit32.Strexb, OpCode32MemStEx.Create);
+ SetA32("<<<<00011010xxxxxxxx11111001xxxx", InstName.Strexd, InstEmit32.Strexd, OpCode32MemStEx.Create);
+ SetA32("<<<<00011110xxxxxxxx11111001xxxx", InstName.Strexh, InstEmit32.Strexh, OpCode32MemStEx.Create);
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, OpCode32MemImm8.Create);
+ SetA32("<<<<000xx0x0xxxxxxxx00001011xxxx", InstName.Strh, InstEmit32.Strh, OpCode32MemReg.Create);
+ SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluImm.Create);
+ SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluRsImm.Create);
+ SetA32("<<<<0000010xxxxxxxxxxxxx0xx1xxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluRsReg.Create);
+ SetA32("<<<<1111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Svc, InstEmit32.Svc, OpCode32Exception.Create);
+ SetA32("<<<<01101010xxxxxxxxxx000111xxxx", InstName.Sxtb, InstEmit32.Sxtb, OpCode32AluUx.Create);
+ SetA32("<<<<01101000xxxxxxxxxx000111xxxx", InstName.Sxtb16, InstEmit32.Sxtb16, OpCode32AluUx.Create);
+ SetA32("<<<<01101011xxxxxxxxxx000111xxxx", InstName.Sxth, InstEmit32.Sxth, OpCode32AluUx.Create);
+ SetA32("<<<<00110011xxxx0000xxxxxxxxxxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluImm.Create);
+ SetA32("<<<<00010011xxxx0000xxxxxxx0xxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010011xxxx0000xxxx0xx1xxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluRsReg.Create);
+ SetA32("<<<<0111111111111101111011111110", InstName.Trap, InstEmit32.Trap, OpCode32Exception.Create);
+ SetA32("<<<<0011001000001111000000010010", InstName.Tsb, InstEmit32.Nop, OpCode32.Create); // Trace Synchronization Barrier (FEAT_TRF)
+ SetA32("<<<<00110001xxxx0000xxxxxxxxxxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluImm.Create);
+ SetA32("<<<<00010001xxxx0000xxxxxxx0xxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluRsImm.Create);
+ SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluRsReg.Create);
+ SetA32("<<<<01100101xxxxxxxx11111001xxxx", InstName.Uadd8, InstEmit32.Uadd8, OpCode32AluReg.Create);
+ SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, OpCode32AluBf.Create);
+ SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, OpCode32AluMla.Create);
+ SetA32("<<<<01100111xxxxxxxx11111001xxxx", InstName.Uhadd8, InstEmit32.Uhadd8, OpCode32AluReg.Create);
+ SetA32("<<<<01100111xxxxxxxx11111111xxxx", InstName.Uhsub8, InstEmit32.Uhsub8, OpCode32AluReg.Create);
+ SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal, InstEmit32.Umaal, OpCode32AluUmull.Create);
+ SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, OpCode32AluUmull.Create);
+ SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, OpCode32AluUmull.Create);
+ SetA32("<<<<01100110xxxxxxxx11110001xxxx", InstName.Uqadd16, InstEmit32.Uqadd16, OpCode32AluReg.Create);
+ SetA32("<<<<01100110xxxxxxxx11111001xxxx", InstName.Uqadd8, InstEmit32.Uqadd8, OpCode32AluReg.Create);
+ SetA32("<<<<01100110xxxxxxxx11110111xxxx", InstName.Uqsub16, InstEmit32.Uqsub16, OpCode32AluReg.Create);
+ SetA32("<<<<01100110xxxxxxxx11111111xxxx", InstName.Uqsub8, InstEmit32.Uqsub8, OpCode32AluReg.Create);
+ SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat, InstEmit32.Usat, OpCode32Sat.Create);
+ SetA32("<<<<01101110xxxxxxxx11110011xxxx", InstName.Usat16, InstEmit32.Usat16, OpCode32Sat16.Create);
+ SetA32("<<<<01100101xxxxxxxx11111111xxxx", InstName.Usub8, InstEmit32.Usub8, OpCode32AluReg.Create);
+ SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCode32AluUx.Create);
+ SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCode32AluUx.Create);
+ SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, OpCode32AluUx.Create);
+ SetA32("<<<<0011001000001111000000000010", InstName.Wfe, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011001000001111000000000011", InstName.Wfi, InstEmit32.Nop, OpCode32.Create);
+ SetA32("<<<<0011001000001111000000000001", InstName.Yield, InstEmit32.Nop, OpCode32.Create);
+
+ // VFP
+ SetVfp("<<<<11101x110000xxxx101x11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe, InstEmit32.Vcmpe, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); // FP 32 and 64, scalar.
+ SetVfp("<<<<11101x11110xxxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // FP32 to int.
+ SetVfp("<<<<11101x111000xxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // Int to FP32.
+ SetVfp("111111101x1111xxxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_RM, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // The many FP32 to int encodings (fp).
+ SetVfp("<<<<11101x11001xxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_TB, OpCode32SimdCvtTB.Create, OpCode32SimdCvtTB.CreateT32);
+ SetVfp("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, OpCode32SimdDupGP.Create, OpCode32SimdDupGP.CreateT32);
+ SetVfp("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<1101xx01xxxxxxxx101xxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, OpCode32SimdMemImm.Create, OpCode32SimdMemImm.CreateT32);
+ SetVfp("111111101x00xxxxxxxx10>>x0x0xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("111111101x00xxxxxxxx10>>x1x0xxxx", InstName.Vminnm, InstEmit32.Vminnm_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x00xxxxxxxx101xx0x0xxxx", InstName.Vmla, InstEmit32.Vmla_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x00xxxxxxxx101xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, OpCode32SimdMovGpElem.Create, OpCode32SimdMovGpElem.CreateT32); // From gen purpose.
+ SetVfp("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, OpCode32SimdMovGpElem.Create, OpCode32SimdMovGpElem.CreateT32); // To gen purpose.
+ SetVfp("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, OpCode32SimdMovGpDouble.Create, OpCode32SimdMovGpDouble.CreateT32); // To/from gen purpose x2 and single precision x2.
+ SetVfp("<<<<1100010xxxxxxxxx101100x1xxxx", InstName.Vmov, InstEmit32.Vmov_GD, OpCode32SimdMovGpDouble.Create, OpCode32SimdMovGpDouble.CreateT32); // To/from gen purpose x2 and double precision.
+ SetVfp("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov, InstEmit32.Vmov_GS, OpCode32SimdMovGp.Create, OpCode32SimdMovGp.CreateT32); // To/from gen purpose and single precision.
+ SetVfp("<<<<11101x11xxxxxxxx101x0000xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm44.Create, OpCode32SimdImm44.CreateT32); // Scalar f16/32/64 based on size 01 10 11.
+ SetVfp("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, OpCode32SimdSpecial.Create, OpCode32SimdSpecial.CreateT32);
+ SetVfp("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, OpCode32SimdSpecial.Create, OpCode32SimdSpecial.CreateT32);
+ SetVfp("<<<<11100x10xxxxxxxx101xx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11101x110001xxxx101x01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11100x01xxxxxxxx101xx1x0xxxx", InstName.Vnmla, InstEmit32.Vnmla_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+ SetVfp("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110110xxxx101x01x0xxxx", InstName.Vrintr, InstEmit32.Vrintr_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110111xxxx101x01x0xxxx", InstName.Vrintx, InstEmit32.Vrintx_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
+ SetVfp("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, OpCode32SimdSel.Create, OpCode32SimdSel.CreateT32);
+ SetVfp("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32);
+ SetVfp("<<<<1101xx00xxxxxxxx101xxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, OpCode32SimdMemImm.Create, OpCode32SimdMemImm.CreateT32);
+ SetVfp("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
+
+ // ASIMD
+ SetAsimd("111100111x110000xxx0001101x0xxx0", InstName.Aesd_V, InstEmit32.Aesd_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100111x110000xxx0001100x0xxx0", InstName.Aese_V, InstEmit32.Aese_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100111x110000xxx0001111x0xxx0", InstName.Aesimc_V, InstEmit32.Aesimc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100111x110000xxx0001110x0xxx0", InstName.Aesmc_V, InstEmit32.Aesmc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100110x00xxx0xxx01100x1x0xxx0", InstName.Sha256h_V, InstEmit32.Sha256h_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100110x01xxx0xxx01100x1x0xxx0", InstName.Sha256h2_V, InstEmit32.Sha256h2_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x111010xxx0001111x0xxx0", InstName.Sha256su0_V, InstEmit32.Sha256su0_V, OpCode32Simd.Create, OpCode32Simd.CreateT32);
+ SetAsimd("111100110x10xxx0xxx01100x1x0xxx0", InstName.Sha256su1_V, InstEmit32.Sha256su1_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x0x<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x10xxxxxxxx000x<>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("111100111x>>>xxxxxxx100000x1xxx0", InstName.Vqshrun, InstEmit32.Vqshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("1111001x0xxxxxxxxxxx0010xxx1xxxx", InstName.Vqsub, InstEmit32.Vqsub, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32);
+ SetAsimd("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("111100111x11xx00xxxx000<>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32);
+ SetAsimd("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx0011>xx1xxxx", InstName.Vrsra, InstEmit32.Vrsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding.
+ SetAsimd("111100111x11<<10xxxx001100x0xxxx", InstName.Vshll, InstEmit32.Vshll2, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); // A2 encoding.
+ SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32);
+ SetAsimd("111100111x>>>xxxxxxx0101>xx1xxxx", InstName.Vsli, InstEmit32.Vsli_I, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32);
+ SetAsimd("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1.
+ SetAsimd("111101000x00xxxxxxxx1010xx<>>>", InstName.It, InstEmit32.It, OpCodeT16IfThen.Create);
+ SetT16("11000xxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCodeT16MemMult.Create);
+ SetT16("11001xxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCodeT16MemMult.Create);
+ SetT16("1101<< allInsts, Func toFastLookupIndex)
+ {
+ List[] temp = new List[FastLookupSize];
+
+ for (int index = 0; index < temp.Length; index++)
+ {
+ temp[index] = new List();
+ }
+
+ foreach (InstInfo inst in allInsts)
+ {
+ int mask = toFastLookupIndex(inst.Mask);
+ int value = toFastLookupIndex(inst.Value);
+
+ for (int index = 0; index < temp.Length; index++)
+ {
+ if ((index & mask) == value)
+ {
+ temp[index].Add(inst);
+ }
+ }
+ }
+
+ for (int index = 0; index < temp.Length; index++)
+ {
+ table[index] = temp[index].ToArray();
+ }
+ }
+
+ private static void SetA32(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ Set(encoding, _allInstA32, new InstDescriptor(name, emitter), makeOp);
+ }
+
+ private static void SetT16(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ encoding = "xxxxxxxxxxxxxxxx" + encoding;
+ Set(encoding, _allInstT32, new InstDescriptor(name, emitter), makeOp);
+ }
+
+ private static void SetT32(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ string reversedEncoding = $"{encoding.AsSpan(16)}{encoding.AsSpan(0, 16)}";
+ OpCode ReversedMakeOp(InstDescriptor inst, ulong address, int opCode)
+ => makeOp(inst, address, (int)BitOperations.RotateRight((uint)opCode, 16));
+ Set(reversedEncoding, _allInstT32, new InstDescriptor(name, emitter), ReversedMakeOp);
+ }
+
+ private static void SetVfp(string encoding, InstName name, InstEmitter emitter, MakeOp makeOpA32, MakeOp makeOpT32)
+ {
+ SetA32(encoding, name, emitter, makeOpA32);
+
+ string thumbEncoding = encoding;
+ if (thumbEncoding.StartsWith("<<<<"))
+ {
+ thumbEncoding = $"1110{thumbEncoding.AsSpan(4)}";
+ }
+ SetT32(thumbEncoding, name, emitter, makeOpT32);
+ }
+
+ private static void SetAsimd(string encoding, InstName name, InstEmitter emitter, MakeOp makeOpA32, MakeOp makeOpT32)
+ {
+ SetA32(encoding, name, emitter, makeOpA32);
+
+ string thumbEncoding = encoding;
+ if (thumbEncoding.StartsWith("11110100"))
+ {
+ thumbEncoding = $"11111001{encoding.AsSpan(8)}";
+ }
+ else if (thumbEncoding.StartsWith("1111001x"))
+ {
+ thumbEncoding = $"111x1111{encoding.AsSpan(8)}";
+ }
+ else if (thumbEncoding.StartsWith("11110010"))
+ {
+ thumbEncoding = $"11101111{encoding.AsSpan(8)}";
+ }
+ else if (thumbEncoding.StartsWith("11110011"))
+ {
+ thumbEncoding = $"11111111{encoding.AsSpan(8)}";
+ }
+ else
+ {
+ throw new ArgumentException("Invalid ASIMD instruction encoding");
+ }
+ SetT32(thumbEncoding, name, emitter, makeOpT32);
+ }
+
+ private static void SetA64(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp)
+ {
+ Set(encoding, _allInstA64, new InstDescriptor(name, emitter), makeOp);
+ }
+
+ private static void Set(string encoding, List list, InstDescriptor inst, MakeOp makeOp)
+ {
+ int bit = encoding.Length - 1;
+ int value = 0;
+ int xMask = 0;
+ int xBits = 0;
+
+ int[] xPos = new int[encoding.Length];
+
+ int blacklisted = 0;
+
+ for (int index = 0; index < encoding.Length; index++, bit--)
+ {
+ // Note: < and > are used on special encodings.
+ // The < means that we should never have ALL bits with the '<' set.
+ // So, when the encoding has <<, it means that 00, 01, and 10 are valid,
+ // but not 11. <<< is 000, 001, ..., 110 but NOT 111, and so on...
+ // For >, the invalid value is zero. So, for >> 01, 10 and 11 are valid,
+ // but 00 isn't.
+ char chr = encoding[index];
+
+ if (chr == '1')
+ {
+ value |= 1 << bit;
+ }
+ else if (chr == 'x')
+ {
+ xMask |= 1 << bit;
+ }
+ else if (chr == '>')
+ {
+ xPos[xBits++] = bit;
+ }
+ else if (chr == '<')
+ {
+ xPos[xBits++] = bit;
+
+ blacklisted |= 1 << bit;
+ }
+ else if (chr != '0')
+ {
+ throw new ArgumentException($"Invalid encoding: {encoding}", nameof(encoding));
+ }
+ }
+
+ xMask = ~xMask;
+
+ if (xBits == 0)
+ {
+ list.Add(new InstInfo(xMask, value, inst, makeOp));
+
+ return;
+ }
+
+ for (int index = 0; index < (1 << xBits); index++)
+ {
+ int mask = 0;
+
+ for (int x = 0; x < xBits; x++)
+ {
+ mask |= ((index >> x) & 1) << xPos[x];
+ }
+
+ if (mask != blacklisted)
+ {
+ list.Add(new InstInfo(xMask, value | mask, inst, makeOp));
+ }
+ }
+ }
+
+ public static (InstDescriptor inst, MakeOp makeOp) GetInstA32(int opCode)
+ {
+ return GetInstFromList(_instA32FastLookup[ToFastLookupIndexA(opCode)], opCode);
+ }
+
+ public static (InstDescriptor inst, MakeOp makeOp) GetInstT32(int opCode)
+ {
+ return GetInstFromList(_instT32FastLookup[ToFastLookupIndexT(opCode)], opCode);
+ }
+
+ public static (InstDescriptor inst, MakeOp makeOp) GetInstA64(int opCode)
+ {
+ return GetInstFromList(_instA64FastLookup[ToFastLookupIndexA(opCode)], opCode);
+ }
+
+ private static (InstDescriptor inst, MakeOp makeOp) GetInstFromList(InstInfo[] insts, int opCode)
+ {
+ foreach (InstInfo info in insts)
+ {
+ if ((opCode & info.Mask) == info.Value)
+ {
+ return (info.Inst, info.MakeOp);
+ }
+ }
+
+ return (new InstDescriptor(InstName.Und, InstEmit.Und), null);
+ }
+
+ private static int ToFastLookupIndexA(int value)
+ {
+ return ((value >> 10) & 0x00F) | ((value >> 18) & 0xFF0);
+ }
+
+ private static int ToFastLookupIndexT(int value)
+ {
+ return (value >> 4) & 0xFFF;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs b/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs
new file mode 100644
index 0000000..9d988f0
--- /dev/null
+++ b/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs
@@ -0,0 +1,88 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders.Optimizations
+{
+ static class TailCallRemover
+ {
+ public static Block[] RunPass(ulong entryAddress, List blocks)
+ {
+ // Detect tail calls:
+ // - Assume this function spans the space covered by contiguous code blocks surrounding the entry address.
+ // - A jump to an area outside this contiguous region will be treated as an exit block.
+ // - Include a small allowance for jumps outside the contiguous range.
+
+ if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId))
+ {
+ throw new InvalidOperationException("Function entry point is not contained in a block.");
+ }
+
+ const ulong Allowance = 4;
+
+ Block entryBlock = blocks[entryBlockId];
+
+ Block startBlock = entryBlock;
+ Block endBlock = entryBlock;
+
+ int startBlockIndex = entryBlockId;
+ int endBlockIndex = entryBlockId;
+
+ for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards.
+ {
+ Block block = blocks[i];
+
+ if (endBlock.EndAddress < block.Address - Allowance)
+ {
+ break; // End of contiguous function.
+ }
+
+ endBlock = block;
+ endBlockIndex = i;
+ }
+
+ for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards.
+ {
+ Block block = blocks[i];
+
+ if (startBlock.Address > block.EndAddress + Allowance)
+ {
+ break; // End of contiguous function.
+ }
+
+ startBlock = block;
+ startBlockIndex = i;
+ }
+
+ if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1)
+ {
+ return blocks.ToArray(); // Nothing to do here.
+ }
+
+ // Mark branches whose target is outside of the contiguous region as an exit block.
+ for (int i = startBlockIndex; i <= endBlockIndex; i++)
+ {
+ Block block = blocks[i];
+
+ if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address))
+ {
+ block.Branch.Exit = true;
+ }
+ }
+
+ var newBlocks = new List(blocks.Count);
+
+ // Finally, rebuild decoded block list, ignoring blocks outside the contiguous range.
+ for (int i = 0; i < blocks.Count; i++)
+ {
+ Block block = blocks[i];
+
+ if (block.Exit || (i >= startBlockIndex && i <= endBlockIndex))
+ {
+ newBlocks.Add(block);
+ }
+ }
+
+ return newBlocks.ToArray();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Decoders/RegisterSize.cs b/src/ARMeilleure/Decoders/RegisterSize.cs
new file mode 100644
index 0000000..7c00984
--- /dev/null
+++ b/src/ARMeilleure/Decoders/RegisterSize.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum RegisterSize
+ {
+ Int32,
+ Int64,
+ Simd64,
+ Simd128,
+ }
+}
diff --git a/src/ARMeilleure/Decoders/ShiftType.cs b/src/ARMeilleure/Decoders/ShiftType.cs
new file mode 100644
index 0000000..43b738f
--- /dev/null
+++ b/src/ARMeilleure/Decoders/ShiftType.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum ShiftType
+ {
+ Lsl = 0,
+ Lsr = 1,
+ Asr = 2,
+ Ror = 3,
+ }
+}
diff --git a/src/ARMeilleure/Diagnostics/IRDumper.cs b/src/ARMeilleure/Diagnostics/IRDumper.cs
new file mode 100644
index 0000000..16833d0
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/IRDumper.cs
@@ -0,0 +1,327 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ARMeilleure.Diagnostics
+{
+ class IRDumper
+ {
+ private const string Indentation = " ";
+
+ private int _indentLevel;
+
+ private readonly StringBuilder _builder;
+
+ private readonly Dictionary _localNames;
+ private readonly Dictionary _symbolNames;
+
+ public IRDumper(int indent)
+ {
+ _indentLevel = indent;
+
+ _builder = new StringBuilder();
+
+ _localNames = new Dictionary();
+ _symbolNames = new Dictionary();
+ }
+
+ private void Indent()
+ {
+ _builder.EnsureCapacity(_builder.Capacity + _indentLevel * Indentation.Length);
+
+ for (int index = 0; index < _indentLevel; index++)
+ {
+#pragma warning disable CA1834 // Use StringBuilder.Append(char) for single character strings
+ _builder.Append(Indentation);
+#pragma warning restore CA1834
+ }
+ }
+
+ private void IncreaseIndentation()
+ {
+ _indentLevel++;
+ }
+
+ private void DecreaseIndentation()
+ {
+ _indentLevel--;
+ }
+
+ private void DumpBlockName(BasicBlock block)
+ {
+ _builder.Append("block").Append(block.Index);
+ }
+
+ private void DumpBlockHeader(BasicBlock block)
+ {
+ DumpBlockName(block);
+
+ if (block.Frequency == BasicBlockFrequency.Cold)
+ {
+ _builder.Append(" cold");
+ }
+
+ if (block.SuccessorsCount > 0)
+ {
+ _builder.Append(" (");
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ DumpBlockName(block.GetSuccessor(i));
+
+ if (i < block.SuccessorsCount - 1)
+ {
+ _builder.Append(", ");
+ }
+ }
+
+ _builder.Append(')');
+ }
+
+ _builder.Append(':');
+ }
+
+ private void DumpOperand(Operand operand)
+ {
+ if (operand == default)
+ {
+ _builder.Append("");
+ return;
+ }
+
+ _builder.Append(GetTypeName(operand.Type)).Append(' ');
+
+ switch (operand.Kind)
+ {
+ case OperandKind.LocalVariable:
+ if (!_localNames.TryGetValue(operand, out string localName))
+ {
+ localName = $"%{_localNames.Count}";
+
+ _localNames.Add(operand, localName);
+ }
+
+ _builder.Append(localName);
+ break;
+
+ case OperandKind.Register:
+ Register reg = operand.GetRegister();
+
+ switch (reg.Type)
+ {
+ case RegisterType.Flag:
+ _builder.Append('b');
+ break;
+ case RegisterType.FpFlag:
+ _builder.Append('f');
+ break;
+ case RegisterType.Integer:
+ _builder.Append('r');
+ break;
+ case RegisterType.Vector:
+ _builder.Append('v');
+ break;
+ }
+
+ _builder.Append(reg.Index);
+ break;
+
+ case OperandKind.Constant:
+ string symbolName = Symbols.Get(operand.Value);
+
+ if (symbolName != null && !_symbolNames.ContainsKey(operand.Value))
+ {
+ _symbolNames.Add(operand.Value, symbolName);
+ }
+
+ _builder.Append("0x").Append(operand.Value.ToString("X"));
+ break;
+
+ case OperandKind.Memory:
+ var memOp = operand.GetMemory();
+
+ _builder.Append('[');
+
+ DumpOperand(memOp.BaseAddress);
+
+ if (memOp.Index != default)
+ {
+ _builder.Append(" + ");
+
+ DumpOperand(memOp.Index);
+
+ switch (memOp.Scale)
+ {
+ case Multiplier.x2:
+ _builder.Append("*2");
+ break;
+ case Multiplier.x4:
+ _builder.Append("*4");
+ break;
+ case Multiplier.x8:
+ _builder.Append("*8");
+ break;
+ }
+ }
+
+ if (memOp.Displacement != 0)
+ {
+ _builder.Append(" + 0x").Append(memOp.Displacement.ToString("X"));
+ }
+
+ _builder.Append(']');
+ break;
+
+ default:
+ _builder.Append(operand.Type);
+ break;
+ }
+ }
+
+ private void DumpNode(ControlFlowGraph cfg, Operation node)
+ {
+ for (int index = 0; index < node.DestinationsCount; index++)
+ {
+ DumpOperand(node.GetDestination(index));
+
+ if (index == node.DestinationsCount - 1)
+ {
+ _builder.Append(" = ");
+ }
+ else
+ {
+ _builder.Append(", ");
+ }
+ }
+
+ switch (node)
+ {
+ case Operation operation:
+ if (operation.Instruction == Instruction.Phi)
+ {
+ PhiOperation phi = operation.AsPhi();
+
+ _builder.Append("Phi ");
+
+ for (int index = 0; index < phi.SourcesCount; index++)
+ {
+ _builder.Append('(');
+
+ DumpBlockName(phi.GetBlock(cfg, index));
+
+ _builder.Append(": ");
+
+ DumpOperand(phi.GetSource(index));
+
+ _builder.Append(')');
+
+ if (index < phi.SourcesCount - 1)
+ {
+ _builder.Append(", ");
+ }
+ }
+
+ break;
+ }
+
+ bool comparison = false;
+
+ _builder.Append(operation.Instruction);
+
+ if (operation.Instruction == Instruction.Extended)
+ {
+ _builder.Append('.').Append(operation.Intrinsic);
+ }
+ else if (operation.Instruction == Instruction.BranchIf ||
+ operation.Instruction == Instruction.Compare)
+ {
+ comparison = true;
+ }
+
+ _builder.Append(' ');
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (index < operation.SourcesCount - 1)
+ {
+ DumpOperand(source);
+
+ _builder.Append(", ");
+ }
+ else if (comparison)
+ {
+ _builder.Append((Comparison)source.AsInt32());
+ }
+ else
+ {
+ DumpOperand(source);
+ }
+ }
+ break;
+ }
+
+ if (_symbolNames.Count == 1)
+ {
+ _builder.Append(" ;; ").Append(_symbolNames.First().Value);
+ }
+ else if (_symbolNames.Count > 1)
+ {
+ _builder.Append(" ;;");
+
+ foreach ((ulong value, string name) in _symbolNames)
+ {
+ _builder.Append(" 0x").Append(value.ToString("X")).Append(" = ").Append(name);
+ }
+ }
+
+ // Reset the set of symbols for the next Node we're going to dump.
+ _symbolNames.Clear();
+ }
+
+ public static string GetDump(ControlFlowGraph cfg)
+ {
+ var dumper = new IRDumper(1);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ dumper.Indent();
+ dumper.DumpBlockHeader(block);
+
+ dumper._builder.AppendLine();
+
+ dumper.IncreaseIndentation();
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ dumper.Indent();
+ dumper.DumpNode(cfg, node);
+
+ dumper._builder.AppendLine();
+ }
+
+ dumper.DecreaseIndentation();
+ }
+
+ return dumper._builder.ToString();
+ }
+
+ private static string GetTypeName(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.None => "none",
+ OperandType.I32 => "i32",
+ OperandType.I64 => "i64",
+ OperandType.FP32 => "f32",
+ OperandType.FP64 => "f64",
+ OperandType.V128 => "v128",
+ _ => throw new ArgumentException($"Invalid operand type \"{type}\"."),
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/Diagnostics/Logger.cs b/src/ARMeilleure/Diagnostics/Logger.cs
new file mode 100644
index 0000000..d7f6123
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/Logger.cs
@@ -0,0 +1,56 @@
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Diagnostics
+{
+ static class Logger
+ {
+ private static long _startTime;
+
+ private static readonly long[] _accumulatedTime;
+
+ static Logger()
+ {
+ _accumulatedTime = new long[(int)PassName.Count];
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void StartPass(PassName name)
+ {
+ WriteOutput(name + " pass started...");
+
+ _startTime = Stopwatch.GetTimestamp();
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void EndPass(PassName name, ControlFlowGraph cfg)
+ {
+ EndPass(name);
+
+ WriteOutput("IR after " + name + " pass:");
+
+ WriteOutput(IRDumper.GetDump(cfg));
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void EndPass(PassName name)
+ {
+ long elapsedTime = Stopwatch.GetTimestamp() - _startTime;
+
+ _accumulatedTime[(int)name] += elapsedTime;
+
+ WriteOutput($"{name} pass ended after {GetMilliseconds(_accumulatedTime[(int)name])} ms...");
+ }
+
+ private static long GetMilliseconds(long ticks)
+ {
+ return (long)(((double)ticks / Stopwatch.Frequency) * 1000);
+ }
+
+ private static void WriteOutput(string text)
+ {
+ Console.WriteLine(text);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Diagnostics/PassName.cs b/src/ARMeilleure/Diagnostics/PassName.cs
new file mode 100644
index 0000000..2d87659
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/PassName.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Diagnostics
+{
+ enum PassName
+ {
+ Decoding,
+ Translation,
+ RegisterUsage,
+ TailMerge,
+ Dominance,
+ SsaConstruction,
+ RegisterToLocal,
+ Optimization,
+ PreAllocation,
+ RegisterAllocation,
+ CodeGeneration,
+
+ Count,
+ }
+}
diff --git a/src/ARMeilleure/Diagnostics/Symbols.cs b/src/ARMeilleure/Diagnostics/Symbols.cs
new file mode 100644
index 0000000..be74d2b
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/Symbols.cs
@@ -0,0 +1,85 @@
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Text;
+
+namespace ARMeilleure.Diagnostics
+{
+ static class Symbols
+ {
+ private readonly struct RangedSymbol
+ {
+ public readonly ulong Start;
+ public readonly ulong End;
+ public readonly ulong ElementSize;
+ public readonly string Name;
+
+ public RangedSymbol(ulong start, ulong end, ulong elemSize, string name)
+ {
+ Start = start;
+ End = end;
+ ElementSize = elemSize;
+ Name = name;
+ }
+ }
+
+ private static readonly ConcurrentDictionary _symbols;
+ private static readonly List _rangedSymbols;
+
+ static Symbols()
+ {
+ _symbols = new ConcurrentDictionary();
+ _rangedSymbols = new List();
+ }
+
+ public static string Get(ulong address)
+ {
+ if (_symbols.TryGetValue(address, out string result))
+ {
+ return result;
+ }
+
+ lock (_rangedSymbols)
+ {
+ foreach (RangedSymbol symbol in _rangedSymbols)
+ {
+ if (address >= symbol.Start && address <= symbol.End)
+ {
+ ulong diff = address - symbol.Start;
+ ulong rem = diff % symbol.ElementSize;
+
+ StringBuilder resultBuilder = new();
+ resultBuilder.Append($"{symbol.Name}_{diff / symbol.ElementSize}");
+
+ if (rem != 0)
+ {
+ resultBuilder.Append($"+{rem}");
+ }
+
+ result = resultBuilder.ToString();
+ _symbols.TryAdd(address, result);
+
+ return result;
+ }
+ }
+ }
+
+ return null;
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void Add(ulong address, string name)
+ {
+ _symbols.TryAdd(address, name);
+ }
+
+ [Conditional("M_DEBUG")]
+ public static void Add(ulong address, ulong size, ulong elemSize, string name)
+ {
+ lock (_rangedSymbols)
+ {
+ _rangedSymbols.Add(new RangedSymbol(address, address + size, elemSize, name));
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs b/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs
new file mode 100644
index 0000000..2e1be8c
--- /dev/null
+++ b/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs
@@ -0,0 +1,67 @@
+using System.Diagnostics.Tracing;
+using System.Threading;
+
+namespace ARMeilleure.Diagnostics
+{
+ [EventSource(Name = "ARMeilleure")]
+ class TranslatorEventSource : EventSource
+ {
+ public static readonly TranslatorEventSource Log = new();
+
+ private int _rejitQueue;
+ private ulong _funcTabSize;
+ private ulong _funcTabLeafSize;
+ private PollingCounter _rejitQueueCounter;
+ private PollingCounter _funcTabSizeCounter;
+ private PollingCounter _funcTabLeafSizeCounter;
+
+ public TranslatorEventSource()
+ {
+ _rejitQueueCounter = new PollingCounter("rejit-queue-length", this, () => _rejitQueue)
+ {
+ DisplayName = "Rejit Queue Length",
+ };
+
+ _funcTabSizeCounter = new PollingCounter("addr-tab-alloc", this, () => _funcTabSize / 1024d / 1024d)
+ {
+ DisplayName = "AddressTable Total Bytes Allocated",
+ DisplayUnits = "MiB",
+ };
+
+ _funcTabLeafSizeCounter = new PollingCounter("addr-tab-leaf-alloc", this, () => _funcTabLeafSize / 1024d / 1024d)
+ {
+ DisplayName = "AddressTable Total Leaf Bytes Allocated",
+ DisplayUnits = "MiB",
+ };
+ }
+
+ public void RejitQueueAdd(int count)
+ {
+ Interlocked.Add(ref _rejitQueue, count);
+ }
+
+ public void AddressTableAllocated(int bytes, bool leaf)
+ {
+ _funcTabSize += (uint)bytes;
+
+ if (leaf)
+ {
+ _funcTabLeafSize += (uint)bytes;
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ _rejitQueueCounter.Dispose();
+ _rejitQueueCounter = null;
+
+ _funcTabLeafSizeCounter.Dispose();
+ _funcTabLeafSizeCounter = null;
+
+ _funcTabSizeCounter.Dispose();
+ _funcTabSizeCounter = null;
+
+ base.Dispose(disposing);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/CryptoHelper.cs b/src/ARMeilleure/Instructions/CryptoHelper.cs
new file mode 100644
index 0000000..ba68ceb
--- /dev/null
+++ b/src/ARMeilleure/Instructions/CryptoHelper.cs
@@ -0,0 +1,282 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class CryptoHelper
+ {
+ #region "LookUp Tables"
+#pragma warning disable IDE1006 // Naming rule violation
+ private static ReadOnlySpan _sBox => new byte[]
+ {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+ };
+
+ private static ReadOnlySpan _invSBox => new byte[]
+ {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
+ };
+
+ private static ReadOnlySpan _gfMul02 => new byte[]
+ {
+ 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+ 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+ 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+ 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+ 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+ 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+ 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+ 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+ 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+ 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+ 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+ 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+ 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+ 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5,
+ };
+
+ private static ReadOnlySpan _gfMul03 => new byte[]
+ {
+ 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+ 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+ 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+ 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+ 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+ 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+ 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+ 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+ 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+ 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+ 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+ 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+ 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+ 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+ 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+ 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a,
+ };
+
+ private static ReadOnlySpan _gfMul09 => new byte[]
+ {
+ 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+ 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+ 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+ 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+ 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+ 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+ 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+ 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+ 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+ 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+ 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+ 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+ 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+ 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+ 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46,
+ };
+
+ private static ReadOnlySpan _gfMul0B => new byte[]
+ {
+ 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+ 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+ 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+ 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+ 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+ 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+ 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+ 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+ 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+ 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+ 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+ 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+ 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+ 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+ 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+ 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3,
+ };
+
+ private static ReadOnlySpan _gfMul0D => new byte[]
+ {
+ 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+ 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+ 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+ 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+ 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+ 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+ 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+ 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+ 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+ 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+ 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+ 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+ 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+ 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+ 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+ 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97,
+ };
+
+ private static ReadOnlySpan _gfMul0E => new byte[]
+ {
+ 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+ 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+ 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+ 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+ 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+ 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+ 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+ 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+ 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+ 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+ 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+ 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+ 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+ 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+ 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+ 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d,
+ };
+
+ private static ReadOnlySpan _srPerm => new byte[]
+ {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3,
+ };
+
+ private static ReadOnlySpan _isrPerm => new byte[]
+ {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11,
+ };
+#pragma warning restore IDE1006
+ #endregion
+
+ public static V128 AesInvMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]);
+ outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]);
+ outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_isrPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _invSBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3);
+ outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3);
+ outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_srPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _sBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAlu.cs b/src/ARMeilleure/Instructions/InstEmitAlu.cs
new file mode 100644
index 0000000..ac17c32
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAlu.cs
@@ -0,0 +1,399 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false);
+ public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true);
+
+ private static void EmitAdc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ carry = context.ZeroExtend32(OperandType.I64, carry);
+ }
+
+ d = context.Add(d, carry);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitAdcsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Add(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Add(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Adds(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Ands(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, m);
+
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Asrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false);
+ public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true);
+
+ private static void EmitBic(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+ }
+
+ SetAluD(context, d, setFlags);
+ }
+
+ public static void Cls(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand nHigh = context.ShiftRightUI(n, Const(1));
+
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue);
+
+ Operand nLow = context.BitwiseAnd(n, mask);
+
+ Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow));
+
+ res = context.Subtract(res, Const(res.Type, 1));
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand d = context.CountLeadingZeros(n);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Eon(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Extr(ArmEmitterContext context)
+ {
+ OpCodeAluRs op = (OpCodeAluRs)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rm);
+
+ if (op.Shift != 0)
+ {
+ if (op.Rn == op.Rm)
+ {
+ res = context.RotateRight(res, Const(op.Shift));
+ }
+ else
+ {
+ res = context.ShiftRightUI(res, Const(op.Shift));
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ int invShift = op.GetBitsCount() - op.Shift;
+
+ res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift)));
+ }
+ }
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Lslv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Lsrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false);
+ public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true);
+
+ private static void EmitSbc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ borrow = context.ZeroExtend32(OperandType.I64, borrow);
+ }
+
+ d = context.Subtract(d, borrow);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Subs(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = EmitReverseBits32Op(context, n);
+ }
+ else
+ {
+ d = EmitReverseBits64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ private static Operand EmitReverseBits64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaaaaaaaaaaul)), Const(1)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x5555555555555555ul)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccccccccccul)), Const(2)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x3333333333333333ul)), Const(2)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0f0f0f0f0ul)), Const(4)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0f0f0f0f0ful)), Const(4)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00ff00ff00ul)), Const(8)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ff00ff00fful)), Const(8)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(32)), context.ShiftLeft(val, Const(32)));
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = EmitReverseBytes16_32Op(context, n);
+ }
+ else
+ {
+ d = EmitReverseBytes16_64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Rev32(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = context.ByteSwap(n);
+ }
+ else
+ {
+ d = EmitReverseBytes32_64Op(context, n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ private static Operand EmitReverseBytes32_64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = EmitReverseBytes16_64Op(context, op);
+
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16)));
+ }
+
+ public static void Rev64(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn)));
+ }
+
+ public static void Rorv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context)));
+ }
+
+ private static Operand GetAluMShift(ArmEmitterContext context)
+ {
+ IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp;
+
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ m = context.ConvertI64ToI32(m);
+ }
+
+ return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1));
+ }
+
+ private static void EmitCVFlagsClear(ArmEmitterContext context)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ SetFlag(context, PState.VFlag, Const(0));
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: false);
+ }
+
+ public static void SetAluDOrZR(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: true);
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR)
+ {
+ IOpCodeAlu op = (IOpCodeAlu)context.CurrOp;
+
+ if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAlu32.cs b/src/ARMeilleure/Instructions/InstEmitAlu32.cs
new file mode 100644
index 0000000..8eabe09
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -0,0 +1,1241 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ [SuppressMessage("Style", "IDE0059: Remove unnecessary value assignment")]
+ static partial class InstEmit32
+ {
+ public static void Add(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ if (op.Rn == RegisterAlias.Aarch32Pc && op is OpCodeT32AluImm12)
+ {
+ // For ADR, PC is always 4 bytes aligned, even in Thumb mode.
+ n = context.BitwiseAnd(n, Const(~3u));
+ }
+
+ Operand res = context.Add(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Adc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ res = context.Add(res, carry);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAdcsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Bfc(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ Operand d = GetIntA32(context, op.Rd);
+ Operand res = context.BitwiseAnd(d, Const(~op.DestMask));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Bfi(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand d = GetIntA32(context, op.Rd);
+ Operand part = context.BitwiseAnd(n, Const(op.SourceMask));
+
+ if (op.Lsb != 0)
+ {
+ part = context.ShiftLeft(part, Const(op.Lsb));
+ }
+
+ Operand res = context.BitwiseAnd(d, Const(~op.DestMask));
+ res = context.BitwiseOr(res, context.BitwiseAnd(part, Const(op.DestMask)));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Bic(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.CountLeadingZeros(m);
+ EmitAluStore(context, res);
+ }
+
+ public static void Cmp(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ public static void Cmn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseExclusiveOr(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mov(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand m = GetAluM(context);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, m);
+ }
+
+ EmitAluStore(context, m);
+ }
+
+ public static void Movt(ArmEmitterContext context)
+ {
+ IOpCode32AluImm16 op = (IOpCode32AluImm16)context.CurrOp;
+
+ Operand d = GetIntA32(context, op.Rd);
+ Operand imm = Const(op.Immediate << 16); // Immeditate value as top halfword.
+ Operand res = context.BitwiseAnd(d, Const(0x0000ffff));
+ res = context.BitwiseOr(res, imm);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mul(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.Multiply(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mvn(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseNot(m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseOr(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Pkh(ArmEmitterContext context)
+ {
+ OpCode32AluRsImm op = (OpCode32AluRsImm)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res;
+
+ bool tbform = op.ShiftType == ShiftType.Asr;
+ if (tbform)
+ {
+ res = context.BitwiseOr(context.BitwiseAnd(n, Const(0xFFFF0000)), context.BitwiseAnd(m, Const(0xFFFF)));
+ }
+ else
+ {
+ res = context.BitwiseOr(context.BitwiseAnd(m, Const(0xFFFF0000)), context.BitwiseAnd(n, Const(0xFFFF)));
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Qadd16(ArmEmitterContext context)
+ {
+ OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+ SetIntA32(context, op.Rd, EmitSigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
+ {
+ EmitSaturateRange(context, d, context.Add(n, m), 16, unsigned: false, setQ: false);
+ }));
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBits32Op(context, m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rev(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = context.ByteSwap(m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBytes16_32Op(context, m);
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Revsh(ArmEmitterContext context)
+ {
+ Operand m = GetAluM(context);
+
+ Operand res = EmitReverseBytes16_32Op(context, m);
+
+ EmitAluStore(context, context.SignExtend16(OperandType.I32, res));
+ }
+
+ public static void Rsc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(m, n);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ res = context.Subtract(res, borrow);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSbcsCCheck(context, m, n);
+ EmitSubsVCheck(context, m, n, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Rsb(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(m, n);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, m, res);
+ EmitSubsVCheck(context, m, n, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sadd8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: true, unsigned: false);
+ }
+
+ public static void Sbc(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ res = context.Subtract(res, borrow);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sbfx(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width.
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Sdiv(ArmEmitterContext context)
+ {
+ EmitDiv(context, unsigned: false);
+ }
+
+ public static void Sel(ArmEmitterContext context)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand ge0 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE0Flag)));
+ Operand ge1 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE1Flag)));
+ Operand ge2 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE2Flag)));
+ Operand ge3 = context.Negate(GetFlag(PState.GE3Flag));
+
+ Operand mask = context.BitwiseOr(ge0, context.ShiftLeft(ge1, Const(8)));
+ mask = context.BitwiseOr(mask, context.ShiftLeft(ge2, Const(16)));
+ mask = context.BitwiseOr(mask, context.ShiftLeft(ge3, Const(24)));
+
+ Operand res = context.BitwiseOr(context.BitwiseAnd(n, mask), context.BitwiseAnd(m, context.BitwiseNot(mask)));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Shadd8(ArmEmitterContext context)
+ {
+ EmitHadd8(context, unsigned: false);
+ }
+
+ public static void Shsub8(ArmEmitterContext context)
+ {
+ EmitHsub8(context, unsigned: false);
+ }
+
+ public static void Ssat(ArmEmitterContext context)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ EmitSat(context, -(1 << op.SatImm), (1 << op.SatImm) - 1);
+ }
+
+ public static void Ssat16(ArmEmitterContext context)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ EmitSat16(context, -(1 << op.SatImm), (1 << op.SatImm) - 1);
+ }
+
+ public static void Ssub8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: false, unsigned: false);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ if (op.Rn == RegisterAlias.Aarch32Pc && op is OpCodeT32AluImm12)
+ {
+ // For ADR, PC is always 4 bytes aligned, even in Thumb mode.
+ n = context.BitwiseAnd(n, Const(~3u));
+ }
+
+ Operand res = context.Subtract(n, m);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Sxtb(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, true, 8);
+ }
+
+ public static void Sxtb16(ArmEmitterContext context)
+ {
+ EmitExtend16(context, true);
+ }
+
+ public static void Sxth(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, true, 16);
+ }
+
+ public static void Teq(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseExclusiveOr(n, m);
+
+ EmitNZFlagsCheck(context, res);
+ }
+
+ public static void Tst(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand res = context.BitwiseAnd(n, m);
+ EmitNZFlagsCheck(context, res);
+ }
+
+ public static void Uadd8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: true, unsigned: true);
+ }
+
+ public static void Ubfx(ArmEmitterContext context)
+ {
+ IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp;
+
+ var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width.
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb));
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ public static void Udiv(ArmEmitterContext context)
+ {
+ EmitDiv(context, unsigned: true);
+ }
+
+ public static void Uhadd8(ArmEmitterContext context)
+ {
+ EmitHadd8(context, unsigned: true);
+ }
+
+ public static void Uhsub8(ArmEmitterContext context)
+ {
+ EmitHsub8(context, unsigned: true);
+ }
+
+ public static void Uqadd16(ArmEmitterContext context)
+ {
+ OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+ SetIntA32(context, op.Rd, EmitUnsigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
+ {
+ EmitSaturateUqadd(context, d, context.Add(n, m), 16);
+ }));
+ }
+
+ public static void Uqadd8(ArmEmitterContext context)
+ {
+ OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+ SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
+ {
+ EmitSaturateUqadd(context, d, context.Add(n, m), 8);
+ }));
+ }
+
+ public static void Uqsub16(ArmEmitterContext context)
+ {
+ OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+ SetIntA32(context, op.Rd, EmitUnsigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
+ {
+ EmitSaturateUqsub(context, d, context.Subtract(n, m), 16);
+ }));
+ }
+
+ public static void Uqsub8(ArmEmitterContext context)
+ {
+ OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
+
+ SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
+ {
+ EmitSaturateUqsub(context, d, context.Subtract(n, m), 8);
+ }));
+ }
+
+ public static void Usat(ArmEmitterContext context)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ EmitSat(context, 0, op.SatImm == 32 ? (int)(~0) : (1 << op.SatImm) - 1);
+ }
+
+ public static void Usat16(ArmEmitterContext context)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ EmitSat16(context, 0, (1 << op.SatImm) - 1);
+ }
+
+ public static void Usub8(ArmEmitterContext context)
+ {
+ EmitAddSub8(context, add: false, unsigned: true);
+ }
+
+ public static void Uxtb(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, false, 8);
+ }
+
+ public static void Uxtb16(ArmEmitterContext context)
+ {
+ EmitExtend16(context, false);
+ }
+
+ public static void Uxth(ArmEmitterContext context)
+ {
+ EmitSignExtend(context, false, 16);
+ }
+
+ private static void EmitSignExtend(ArmEmitterContext context, bool signed, int bits)
+ {
+ IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp;
+
+ Operand m = GetAluM(context);
+ Operand res;
+
+ if (op.RotateBits == 0)
+ {
+ res = m;
+ }
+ else
+ {
+ Operand rotate = Const(op.RotateBits);
+ res = context.RotateRight(m, rotate);
+ }
+
+ switch (bits)
+ {
+ case 8:
+ res = (signed) ? context.SignExtend8(OperandType.I32, res) : context.ZeroExtend8(OperandType.I32, res);
+ break;
+ case 16:
+ res = (signed) ? context.SignExtend16(OperandType.I32, res) : context.ZeroExtend16(OperandType.I32, res);
+ break;
+ }
+
+ if (op.Add)
+ {
+ res = context.Add(res, GetAluN(context));
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitExtend16(ArmEmitterContext context, bool signed)
+ {
+ IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp;
+
+ Operand m = GetAluM(context);
+ Operand res;
+
+ if (op.RotateBits == 0)
+ {
+ res = m;
+ }
+ else
+ {
+ Operand rotate = Const(op.RotateBits);
+ res = context.RotateRight(m, rotate);
+ }
+
+ Operand low16, high16;
+ if (signed)
+ {
+ low16 = context.SignExtend8(OperandType.I32, res);
+ high16 = context.SignExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16)));
+ }
+ else
+ {
+ low16 = context.ZeroExtend8(OperandType.I32, res);
+ high16 = context.ZeroExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16)));
+ }
+
+ if (op.Add)
+ {
+ Operand n = GetAluN(context);
+ Operand lowAdd, highAdd;
+ if (signed)
+ {
+ lowAdd = context.SignExtend16(OperandType.I32, n);
+ highAdd = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ lowAdd = context.ZeroExtend16(OperandType.I32, n);
+ highAdd = context.ZeroExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16)));
+ }
+
+ low16 = context.Add(low16, lowAdd);
+ high16 = context.Add(high16, highAdd);
+ }
+
+ res = context.BitwiseOr(
+ context.ZeroExtend16(OperandType.I32, low16),
+ context.ShiftLeft(context.ZeroExtend16(OperandType.I32, high16), Const(16)));
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand zero = Const(m.Type, 0);
+
+ Operand divisorIsZero = context.ICompareEqual(m, zero);
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // ARM64 behaviour: If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ // TODO: tests to ensure A32 works the same
+
+ Operand intMin = Const(int.MinValue);
+ Operand minus1 = Const(-1);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ EmitAluStore(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand res = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide(n, m);
+
+ EmitAluStore(context, res);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ EmitAluStore(context, zero);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitAddSub8(ArmEmitterContext context, bool add, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand res = Const(0);
+
+ for (int byteSel = 0; byteSel < 4; byteSel++)
+ {
+ Operand shift = Const(byteSel * 8);
+
+ Operand nByte = context.ShiftRightUI(n, shift);
+ Operand mByte = context.ShiftRightUI(m, shift);
+
+ nByte = unsigned ? context.ZeroExtend8(OperandType.I32, nByte) : context.SignExtend8(OperandType.I32, nByte);
+ mByte = unsigned ? context.ZeroExtend8(OperandType.I32, mByte) : context.SignExtend8(OperandType.I32, mByte);
+
+ Operand resByte = add ? context.Add(nByte, mByte) : context.Subtract(nByte, mByte);
+
+ res = context.BitwiseOr(res, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, resByte), shift));
+
+ SetFlag(context, PState.GE0Flag + byteSel, unsigned && add
+ ? context.ShiftRightUI(resByte, Const(8))
+ : context.ShiftRightUI(context.BitwiseNot(resByte), Const(31)));
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitHadd8(ArmEmitterContext context, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand m = GetIntA32(context, op.Rm);
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand xor, res, carry;
+
+ // This relies on the equality x+y == ((x&y) << 1) + (x^y).
+ // Note that x^y always contains the LSB of the result.
+ // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
+ // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
+
+ res = context.BitwiseAnd(m, n);
+ carry = context.BitwiseExclusiveOr(m, n);
+ xor = context.ShiftRightUI(carry, Const(1));
+ xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu));
+ res = context.Add(res, xor);
+
+ if (!unsigned)
+ {
+ // Propagates the sign bit from (x^y)>>1 upwards by one.
+ carry = context.BitwiseAnd(carry, Const(0x80808080u));
+ res = context.BitwiseExclusiveOr(res, carry);
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitHsub8(ArmEmitterContext context, bool unsigned)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand m = GetIntA32(context, op.Rm);
+ Operand n = GetIntA32(context, op.Rn);
+ Operand left, right, carry, res;
+
+ // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
+ // Note that x^y always contains the LSB of the result.
+ // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
+
+ carry = context.BitwiseExclusiveOr(m, n);
+ left = context.ShiftRightUI(carry, Const(1));
+ right = context.BitwiseAnd(carry, m);
+
+ // We must now perform a partitioned subtraction.
+ // We can do this because minuend contains 7 bit fields.
+ // We use the extra bit in minuend as a bit to borrow from; we set this bit.
+ // We invert this bit at the end as this tells us if that bit was borrowed from.
+
+ res = context.BitwiseOr(left, Const(0x80808080));
+ res = context.Subtract(res, right);
+ res = context.BitwiseExclusiveOr(res, Const(0x80808080));
+
+ if (!unsigned)
+ {
+ // We then sign extend the result into this bit.
+ carry = context.BitwiseAnd(carry, Const(0x80808080));
+ res = context.BitwiseExclusiveOr(res, carry);
+ }
+
+ SetIntA32(context, op.Rd, res);
+ }
+
+ private static void EmitSat(ArmEmitterContext context, int intMin, int intMax)
+ {
+ OpCode32Sat op = (OpCode32Sat)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ int shift = DecodeImmShift(op.ShiftType, op.Imm5);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl:
+ if (shift == 32)
+ {
+ n = Const(0);
+ }
+ else
+ {
+ n = context.ShiftLeft(n, Const(shift));
+ }
+ break;
+ case ShiftType.Asr:
+ if (shift == 32)
+ {
+ n = context.ShiftRightSI(n, Const(31));
+ }
+ else
+ {
+ n = context.ShiftRightSI(n, Const(shift));
+ }
+ break;
+ }
+
+ Operand lblCheckLtIntMin = Label();
+ Operand lblNoSat = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(n, Const(intMax)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetIntA32(context, op.Rd, Const(intMax));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblCheckLtIntMin);
+ context.BranchIfFalse(lblNoSat, context.ICompareLess(n, Const(intMin)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetIntA32(context, op.Rd, Const(intMin));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNoSat);
+
+ SetIntA32(context, op.Rd, n);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitSat16(ArmEmitterContext context, int intMin, int intMax)
+ {
+ OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp;
+
+ void SetD(int part, Operand value)
+ {
+ if (part == 0)
+ {
+ SetIntA32(context, op.Rd, context.ZeroExtend16(OperandType.I32, value));
+ }
+ else
+ {
+ SetIntA32(context, op.Rd, context.BitwiseOr(GetIntA32(context, op.Rd), context.ShiftLeft(value, Const(16))));
+ }
+ }
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand nLow = context.SignExtend16(OperandType.I32, n);
+ Operand nHigh = context.ShiftRightSI(n, Const(16));
+
+ for (int part = 0; part < 2; part++)
+ {
+ Operand nPart = part == 0 ? nLow : nHigh;
+
+ Operand lblCheckLtIntMin = Label();
+ Operand lblNoSat = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(nPart, Const(intMax)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetD(part, Const(intMax));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblCheckLtIntMin);
+ context.BranchIfFalse(lblNoSat, context.ICompareLess(nPart, Const(intMin)));
+
+ SetFlag(context, PState.QFlag, Const(1));
+ SetD(part, Const(intMin));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNoSat);
+
+ SetD(part, nPart);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ private static void EmitSaturateRange(ArmEmitterContext context, Operand result, Operand value, uint saturateTo, bool unsigned, bool setQ = true)
+ {
+ Debug.Assert(saturateTo <= 32);
+ Debug.Assert(!unsigned || saturateTo < 32);
+
+ if (!unsigned && saturateTo == 32)
+ {
+ // No saturation possible for this case.
+
+ context.Copy(result, value);
+
+ return;
+ }
+ else if (saturateTo == 0)
+ {
+ // Result is always zero if we saturate 0 bits.
+
+ context.Copy(result, Const(0));
+
+ return;
+ }
+
+ Operand satValue;
+
+ if (unsigned)
+ {
+ // Negative values always saturate (to zero).
+ // So we must always ignore the sign bit when masking, so that the truncated value will differ from the original one.
+
+ satValue = context.BitwiseAnd(value, Const((int)(uint.MaxValue >> (32 - (int)saturateTo))));
+ }
+ else
+ {
+ satValue = context.ShiftLeft(value, Const(32 - (int)saturateTo));
+ satValue = context.ShiftRightSI(satValue, Const(32 - (int)saturateTo));
+ }
+
+ // If the result is 0, the values are equal and we don't need saturation.
+ Operand lblNoSat = Label();
+ context.BranchIfFalse(lblNoSat, context.Subtract(value, satValue));
+
+ // Saturate and set Q flag.
+ if (unsigned)
+ {
+ if (saturateTo == 31)
+ {
+ // Only saturation case possible when going from 32 bits signed to 32 or 31 bits unsigned
+ // is when the signed input is negative, as all positive values are representable on a 31 bits range.
+
+ satValue = Const(0);
+ }
+ else
+ {
+ satValue = context.ShiftRightSI(value, Const(31));
+ satValue = context.BitwiseNot(satValue);
+ satValue = context.ShiftRightUI(satValue, Const(32 - (int)saturateTo));
+ }
+ }
+ else
+ {
+ if (saturateTo == 1)
+ {
+ satValue = context.ShiftRightSI(value, Const(31));
+ }
+ else
+ {
+ satValue = Const(uint.MaxValue >> (33 - (int)saturateTo));
+ satValue = context.BitwiseExclusiveOr(satValue, context.ShiftRightSI(value, Const(31)));
+ }
+ }
+
+ if (setQ)
+ {
+ SetFlag(context, PState.QFlag, Const(1));
+ }
+
+ context.Copy(result, satValue);
+
+ Operand lblExit = Label();
+ context.Branch(lblExit);
+
+ context.MarkLabel(lblNoSat);
+
+ context.Copy(result, value);
+
+ context.MarkLabel(lblExit);
+ }
+
+ private static void EmitSaturateUqadd(ArmEmitterContext context, Operand result, Operand value, uint saturateTo)
+ {
+ Debug.Assert(saturateTo <= 32);
+
+ if (saturateTo == 32)
+ {
+ // No saturation possible for this case.
+
+ context.Copy(result, value);
+
+ return;
+ }
+ else if (saturateTo == 0)
+ {
+ // Result is always zero if we saturate 0 bits.
+
+ context.Copy(result, Const(0));
+
+ return;
+ }
+
+ // If the result is 0, the values are equal and we don't need saturation.
+ Operand lblNoSat = Label();
+ context.BranchIfFalse(lblNoSat, context.ShiftRightUI(value, Const((int)saturateTo)));
+
+ // Saturate.
+ context.Copy(result, Const(uint.MaxValue >> (32 - (int)saturateTo)));
+
+ Operand lblExit = Label();
+ context.Branch(lblExit);
+
+ context.MarkLabel(lblNoSat);
+
+ context.Copy(result, value);
+
+ context.MarkLabel(lblExit);
+ }
+
+ private static void EmitSaturateUqsub(ArmEmitterContext context, Operand result, Operand value, uint saturateTo)
+ {
+ Debug.Assert(saturateTo <= 32);
+
+ if (saturateTo == 32)
+ {
+ // No saturation possible for this case.
+
+ context.Copy(result, value);
+
+ return;
+ }
+ else if (saturateTo == 0)
+ {
+ // Result is always zero if we saturate 0 bits.
+
+ context.Copy(result, Const(0));
+
+ return;
+ }
+
+ // If the result is 0, the values are equal and we don't need saturation.
+ Operand lblNoSat = Label();
+ context.BranchIf(lblNoSat, value, Const(0), Comparison.GreaterOrEqual);
+
+ // Saturate.
+ // Assumes that the value can only underflow, since this is only used for unsigned subtraction.
+ context.Copy(result, Const(0));
+
+ Operand lblExit = Label();
+ context.Branch(lblExit);
+
+ context.MarkLabel(lblNoSat);
+
+ context.Copy(result, value);
+
+ context.MarkLabel(lblExit);
+ }
+
+ private static Operand EmitSigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction)
+ {
+ Operand tempD = context.AllocateLocal(OperandType.I32);
+
+ Operand tempN = context.SignExtend16(OperandType.I32, rn);
+ Operand tempM = context.SignExtend16(OperandType.I32, rm);
+ elementAction(tempD, tempN, tempM);
+ Operand tempD2 = context.ZeroExtend16(OperandType.I32, tempD);
+
+ tempN = context.ShiftRightSI(rn, Const(16));
+ tempM = context.ShiftRightSI(rm, Const(16));
+ elementAction(tempD, tempN, tempM);
+ return context.BitwiseOr(tempD2, context.ShiftLeft(tempD, Const(16)));
+ }
+
+ private static Operand EmitUnsigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction)
+ {
+ Operand tempD = context.AllocateLocal(OperandType.I32);
+
+ Operand tempN = context.ZeroExtend16(OperandType.I32, rn);
+ Operand tempM = context.ZeroExtend16(OperandType.I32, rm);
+ elementAction(tempD, tempN, tempM);
+ Operand tempD2 = context.ZeroExtend16(OperandType.I32, tempD);
+
+ tempN = context.ShiftRightUI(rn, Const(16));
+ tempM = context.ShiftRightUI(rm, Const(16));
+ elementAction(tempD, tempN, tempM);
+ return context.BitwiseOr(tempD2, context.ShiftLeft(tempD, Const(16)));
+ }
+
+ private static Operand EmitSigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction)
+ {
+ return Emit8BitPair(context, rn, rm, elementAction, unsigned: false);
+ }
+
+ private static Operand EmitUnsigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction)
+ {
+ return Emit8BitPair(context, rn, rm, elementAction, unsigned: true);
+ }
+
+ private static Operand Emit8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction, bool unsigned)
+ {
+ Operand tempD = context.AllocateLocal(OperandType.I32);
+ Operand result = default;
+
+ for (int b = 0; b < 4; b++)
+ {
+ Operand nByte = b != 0 ? context.ShiftRightUI(rn, Const(b * 8)) : rn;
+ Operand mByte = b != 0 ? context.ShiftRightUI(rm, Const(b * 8)) : rm;
+
+ if (unsigned)
+ {
+ nByte = context.ZeroExtend8(OperandType.I32, nByte);
+ mByte = context.ZeroExtend8(OperandType.I32, mByte);
+ }
+ else
+ {
+ nByte = context.SignExtend8(OperandType.I32, nByte);
+ mByte = context.SignExtend8(OperandType.I32, mByte);
+ }
+
+ elementAction(tempD, nByte, mByte);
+
+ if (b == 0)
+ {
+ result = context.ZeroExtend8(OperandType.I32, tempD);
+ }
+ else if (b < 3)
+ {
+ result = context.BitwiseOr(result, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, tempD), Const(b * 8)));
+ }
+ else
+ {
+ result = context.BitwiseOr(result, context.ShiftLeft(tempD, Const(24)));
+ }
+ }
+
+ return result;
+ }
+
+ private static void EmitAluStore(ArmEmitterContext context, Operand value)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ EmitGenericAluStoreA32(context, op.Rd, ShouldSetFlags(context), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitAluHelper.cs b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 0000000..4d4a31f
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,652 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitAluHelper
+ {
+ public static bool ShouldSetFlags(ArmEmitterContext context)
+ {
+ IOpCode32HasSetFlags op = (IOpCode32HasSetFlags)context.CurrOp;
+
+ if (op.SetFlags == null)
+ {
+ return !context.IsInIfThenBlock;
+ }
+
+ return op.SetFlags.Value;
+ }
+
+ public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d)
+ {
+ SetFlag(context, PState.NFlag, context.ICompareLess(d, Const(d.Type, 0)));
+ SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0)));
+ }
+
+ public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = (Rd == Rn && CIn) || Rd < Rn
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = Rd < Rn
+ SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n));
+ }
+
+ public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m)));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = (Rn == Rm && CIn) || Rn > Rm
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = Rn >= Rm
+ SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m));
+ }
+
+ public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & (Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I32);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x55555555u)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x33333333u)), Const(2)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4)));
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16)));
+ }
+
+ public static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8)));
+ }
+
+ public static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I32);
+
+ Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op));
+
+ return context.ConvertI64ToI32(val);
+ }
+
+ private static void EmitAluWritePc(ArmEmitterContext context, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.I32);
+
+ if (((OpCode32)context.CurrOp).IsThumb)
+ {
+ bool isReturn = IsA32Return(context);
+ if (!isReturn)
+ {
+ context.StoreToContext();
+ }
+
+ InstEmitFlowHelper.EmitVirtualJump(context, value, isReturn);
+ }
+ else
+ {
+ EmitBxWritePc(context, value);
+ }
+ }
+
+ public static void EmitGenericAluStoreA32(ArmEmitterContext context, int rd, bool setFlags, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.I32);
+
+ if (rd == RegisterAlias.Aarch32Pc && setFlags)
+ {
+ if (setFlags)
+ {
+ // TODO: Load SPSR etc.
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ EmitAluWritePc(context, value);
+ }
+ }
+ else
+ {
+ SetIntA32(context, rd, value);
+ }
+ }
+
+ public static Operand GetAluN(ArmEmitterContext context)
+ {
+ if (context.CurrOp is IOpCodeAlu op)
+ {
+ if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs)
+ {
+ return GetIntOrZR(context, op.Rn);
+ }
+ else
+ {
+ return GetIntOrSP(context, op.Rn);
+ }
+ }
+ else if (context.CurrOp is IOpCode32Alu op32)
+ {
+ return GetIntA32(context, op32.Rn);
+ }
+ else
+ {
+ throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true)
+ {
+ switch (context.CurrOp)
+ {
+ // ARM32.
+ case IOpCode32AluImm op:
+ {
+ if (ShouldSetFlags(context) && op.IsRotated && setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31));
+ }
+
+ return Const(op.Immediate);
+ }
+
+ case IOpCode32AluImm16 op:
+ return Const(op.Immediate);
+
+ case IOpCode32AluRsImm op:
+ return GetMShiftedByImmediate(context, op, setCarry);
+ case IOpCode32AluRsReg op:
+ return GetMShiftedByReg(context, op, setCarry);
+
+ case IOpCode32AluReg op:
+ return GetIntA32(context, op.Rm);
+
+ // ARM64.
+ case IOpCodeAluImm op:
+ {
+ if (op.GetOperandType() == OperandType.I32)
+ {
+ return Const((int)op.Immediate);
+ }
+ else
+ {
+ return Const(op.Immediate);
+ }
+ }
+
+ case IOpCodeAluRs op:
+ {
+ Operand value = GetIntOrZR(context, op.Rm);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl:
+ value = context.ShiftLeft(value, Const(op.Shift));
+ break;
+ case ShiftType.Lsr:
+ value = context.ShiftRightUI(value, Const(op.Shift));
+ break;
+ case ShiftType.Asr:
+ value = context.ShiftRightSI(value, Const(op.Shift));
+ break;
+ case ShiftType.Ror:
+ value = context.RotateRight(value, Const(op.Shift));
+ break;
+ }
+
+ return value;
+ }
+
+ case IOpCodeAluRx op:
+ {
+ Operand value = GetExtendedM(context, op.Rm, op.IntType);
+
+ value = context.ShiftLeft(value, Const(op.Shift));
+
+ return value;
+ }
+
+ default:
+ throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ // ARM32 helpers.
+ public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32AluRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Immediate;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr:
+ shift = 32;
+ break;
+ case ShiftType.Asr:
+ shift = 32;
+ break;
+ case ShiftType.Ror:
+ shift = 1;
+ break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= ShouldSetFlags(context);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl:
+ m = GetLslC(context, m, setCarry, shift);
+ break;
+ case ShiftType.Lsr:
+ m = GetLsrC(context, m, setCarry, shift);
+ break;
+ case ShiftType.Asr:
+ m = GetAsrC(context, m, setCarry, shift);
+ break;
+ case ShiftType.Ror:
+ if (op.Immediate != 0)
+ {
+ m = GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+
+ public static int DecodeImmShift(ShiftType shiftType, int shift)
+ {
+ if (shift == 0)
+ {
+ switch (shiftType)
+ {
+ case ShiftType.Lsr:
+ shift = 32;
+ break;
+ case ShiftType.Asr:
+ shift = 32;
+ break;
+ case ShiftType.Ror:
+ shift = 1;
+ break;
+ }
+ }
+
+ return shift;
+ }
+
+ public static Operand GetMShiftedByReg(ArmEmitterContext context, IOpCode32AluRsReg op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ Operand s = context.ZeroExtend8(OperandType.I32, GetIntA32(context, op.Rs));
+ Operand shiftIsZero = context.ICompareEqual(s, Const(0));
+
+ Operand zeroResult = m;
+ Operand shiftResult = m;
+
+ setCarry &= ShouldSetFlags(context);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl:
+ shiftResult = EmitLslC(context, m, setCarry, s, shiftIsZero);
+ break;
+ case ShiftType.Lsr:
+ shiftResult = EmitLsrC(context, m, setCarry, s, shiftIsZero);
+ break;
+ case ShiftType.Asr:
+ shiftResult = EmitAsrC(context, m, setCarry, s, shiftIsZero);
+ break;
+ case ShiftType.Ror:
+ shiftResult = EmitRorC(context, m, setCarry, s, shiftIsZero);
+ break;
+ }
+
+ return context.ConditionalSelect(shiftIsZero, zeroResult, shiftResult);
+ }
+
+ public static void EmitIfHelper(ArmEmitterContext context, Operand boolValue, Action action, bool expected = true)
+ {
+ Debug.Assert(boolValue.Type == OperandType.I32);
+
+ Operand endLabel = Label();
+
+ if (expected)
+ {
+ context.BranchIfFalse(endLabel, boolValue);
+ }
+ else
+ {
+ context.BranchIfTrue(endLabel, boolValue);
+ }
+
+ action();
+
+ context.MarkLabel(endLabel);
+ }
+
+ public static Operand EmitLslC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32));
+ Operand result = context.ShiftLeft(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(Const(32), shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+ cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut);
+
+ SetFlag(context, PState.CFlag, cOut);
+ }, false);
+ }
+
+ return context.ConditionalSelect(shiftLarge, Const(0), result);
+ }
+
+ public static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ Operand cOut = context.ShiftRightUI(m, Const(32 - shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ return context.ShiftLeft(m, Const(shift));
+ }
+ }
+
+ public static Operand EmitLsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32));
+ Operand result = context.ShiftRightUI(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1)));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+ cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut);
+
+ SetFlag(context, PState.CFlag, cOut);
+ }, false);
+ }
+
+ return context.ConditionalSelect(shiftLarge, Const(0), result);
+ }
+
+ public static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightUI(m, Const(shift));
+ }
+ }
+
+ private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry)
+ {
+ if (setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ }
+
+ return Const(0);
+ }
+
+ public static Operand EmitAsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ Operand l32Result;
+ Operand ge32Result;
+
+ Operand less32 = context.ICompareLess(shift, Const(32));
+
+ ge32Result = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ EmitIfHelper(context, context.BitwiseOr(less32, shiftIsZero), () =>
+ {
+ SetCarryMLsb(context, ge32Result);
+ }, false);
+ }
+
+ l32Result = context.ShiftRightSI(m, shift);
+ if (setCarry)
+ {
+ EmitIfHelper(context, context.BitwiseAnd(less32, context.BitwiseNot(shiftIsZero)), () =>
+ {
+ Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1)));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ });
+ }
+
+ return context.ConditionalSelect(less32, l32Result, ge32Result);
+ }
+
+ public static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ if ((uint)shift >= 32)
+ {
+ m = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return m;
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightSI(m, Const(shift));
+ }
+ }
+
+ public static Operand EmitRorC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero)
+ {
+ Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32);
+
+ shift = context.BitwiseAnd(shift, Const(0x1f));
+ m = context.RotateRight(m, shift);
+
+ if (setCarry)
+ {
+ EmitIfHelper(context, shiftIsZero, () =>
+ {
+ SetCarryMMsb(context, m);
+ }, false);
+ }
+
+ return m;
+ }
+
+ public static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ shift &= 0x1f;
+
+ m = context.RotateRight(m, Const(shift));
+
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return m;
+ }
+
+ public static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ // Rotate right by 1 with carry.
+ Operand cIn = context.Copy(GetFlag(PState.CFlag));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ m = context.ShiftRightUI(m, Const(1));
+
+ m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31)));
+
+ return m;
+ }
+
+ private static void SetCarryMLsb(ArmEmitterContext context, Operand m)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1)));
+ }
+
+ private static void SetCarryMMsb(ArmEmitterContext context, Operand m)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31)));
+ }
+
+ private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift)
+ {
+ Debug.Assert(m.Type == OperandType.I32);
+
+ Operand cOut = context.ShiftRightUI(m, Const(shift - 1));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitBfm.cs b/src/ARMeilleure/Instructions/InstEmitBfm.cs
new file mode 100644
index 0000000..aaf2287
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitBfm.cs
@@ -0,0 +1,196 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand d = GetIntOrZR(context, op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand res;
+
+ if (op.Pos < op.Shift)
+ {
+ // BFI.
+ int shift = op.GetBitsCount() - op.Shift;
+
+ int width = op.Pos + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift))));
+ }
+ else
+ {
+ // BFXIL.
+ int shift = op.Shift;
+
+ int width = op.Pos - shift + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask)));
+ }
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Sbfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int bitsCount = op.GetBitsCount();
+
+ if (op.Pos + 1 == bitsCount)
+ {
+ EmitSbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitSbfiz(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n));
+ }
+ else if (op.Pos == 31 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n));
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft(res, Const(bitsCount - 1 - op.Pos));
+ res = context.ShiftRightSI(res, Const(bitsCount - 1));
+ res = context.BitwiseAnd(res, Const(res.Type, ~op.TMask));
+
+ Operand n2 = GetBfmN(context);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2));
+ }
+ }
+
+ public static void Ubfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ if (op.Pos + 1 == op.GetBitsCount())
+ {
+ EmitUbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitUbfiz(context);
+ }
+ else if (op.Pos + 1 == op.Shift)
+ {
+ EmitBfmLsl(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff)));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff)));
+ }
+ else
+ {
+ SetIntOrZR(context, op.Rd, GetBfmN(context));
+ }
+ }
+
+ private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true);
+ private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false);
+
+ private static void EmitBfiz(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int width = op.Pos + 1;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft(res, Const(op.GetBitsCount() - width));
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift - width))
+ : context.ShiftRightUI(res, Const(op.Shift - width));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitSbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: true);
+ }
+
+ private static void EmitUbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: false);
+ }
+
+ private static void EmitBfmShift(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift))
+ : context.ShiftRightUI(res, Const(op.Shift));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitBfmLsl(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ int shift = op.GetBitsCount() - op.Shift;
+
+ SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift)));
+ }
+
+ private static Operand GetBfmN(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ long mask = op.WMask & op.TMask;
+
+ return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitCcmp.cs b/src/ARMeilleure/Instructions/InstEmitCcmp.cs
new file mode 100644
index 0000000..a71fc26
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitCcmp.cs
@@ -0,0 +1,60 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true);
+ public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false);
+
+ private static void EmitCcmp(ArmEmitterContext context, bool isNegated)
+ {
+ OpCodeCcmp op = (OpCodeCcmp)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ EmitCondBranch(context, lblTrue, op.Cond);
+
+ SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1));
+ SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1));
+ SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1));
+ SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ if (isNegated)
+ {
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+ else
+ {
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitCsel.cs b/src/ARMeilleure/Instructions/InstEmitCsel.cs
new file mode 100644
index 0000000..1cd936b
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitCsel.cs
@@ -0,0 +1,52 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum CselOperation
+ {
+ None,
+ Increment,
+ Invert,
+ Negate,
+ }
+
+ public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None);
+ public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment);
+ public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert);
+ public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate);
+
+ private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp)
+ {
+ OpCodeCsel op = (OpCodeCsel)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (cselOp == CselOperation.Increment)
+ {
+ m = context.Add(m, Const(m.Type, 1));
+ }
+ else if (cselOp == CselOperation.Invert)
+ {
+ m = context.BitwiseNot(m);
+ }
+ else if (cselOp == CselOperation.Negate)
+ {
+ m = context.Negate(m);
+ }
+
+ Operand condTrue = GetCondTrue(context, op.Cond);
+
+ Operand d = context.ConditionalSelect(condTrue, n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitDiv.cs b/src/ARMeilleure/Instructions/InstEmitDiv.cs
new file mode 100644
index 0000000..728462e
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitDiv.cs
@@ -0,0 +1,66 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false);
+ public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true);
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ // If Rm == 0, Rd = 0 (division by zero).
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0));
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue);
+ Operand minus1 = is32Bits ? Const(-1) : Const(-1L);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ SetAluDOrZR(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand d = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide(n, m);
+
+ SetAluDOrZR(context, d);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ SetAluDOrZR(context, Const(op.GetOperandType(), 0));
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitException.cs b/src/ARMeilleure/Instructions/InstEmitException.cs
new file mode 100644
index 0000000..d30fb2f
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitException.cs
@@ -0,0 +1,55 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Brk(ArmEmitterContext context)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ string name = nameof(NativeInterface.Break);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ context.Return(Const(op.Address));
+ }
+
+ public static void Svc(ArmEmitterContext context)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ string name = nameof(NativeInterface.SupervisorCall);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ Translator.EmitSynchronization(context);
+ }
+
+ public static void Und(ArmEmitterContext context)
+ {
+ OpCode op = context.CurrOp;
+
+ string name = nameof(NativeInterface.Undefined);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.RawOpCode));
+
+ context.LoadFromContext();
+
+ context.Return(Const(op.Address));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitException32.cs b/src/ARMeilleure/Instructions/InstEmitException32.cs
new file mode 100644
index 0000000..57af152
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitException32.cs
@@ -0,0 +1,39 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Svc(ArmEmitterContext context)
+ {
+ IOpCode32Exception op = (IOpCode32Exception)context.CurrOp;
+
+ string name = nameof(NativeInterface.SupervisorCall);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ Translator.EmitSynchronization(context);
+ }
+
+ public static void Trap(ArmEmitterContext context)
+ {
+ IOpCode32Exception op = (IOpCode32Exception)context.CurrOp;
+
+ string name = nameof(NativeInterface.Break);
+
+ context.StoreToContext();
+
+ context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ context.Return(Const(context.CurrOp.Address));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitFlow.cs b/src/ARMeilleure/Instructions/InstEmitFlow.cs
new file mode 100644
index 0000000..a986bf6
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlow.cs
@@ -0,0 +1,107 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+
+ public static void B_Cond(ArmEmitterContext context)
+ {
+ OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp;
+
+ EmitBranch(context, op.Cond);
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blr(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ Operand n = context.Copy(GetIntOrZR(context, op.Rn));
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitVirtualCall(context, n);
+ }
+
+ public static void Br(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ EmitVirtualJump(context, GetIntOrZR(context, op.Rn), op.Rn == RegisterAlias.Lr);
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp;
+
+ EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero);
+ }
+
+ public static void Ret(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ context.Return(GetIntOrZR(context, op.Rn));
+ }
+
+ public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);
+ public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false);
+
+ private static void EmitTb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp;
+
+ Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit));
+
+ EmitBranch(context, value, onNotZero);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Condition cond)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitFlow32.cs b/src/ARMeilleure/Instructions/InstEmitFlow32.cs
new file mode 100644
index 0000000..289d3f4
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlow32.cs
@@ -0,0 +1,136 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ Blx(context, x: false);
+ }
+
+ public static void Blx(ArmEmitterContext context)
+ {
+ Blx(context, x: true);
+ }
+
+ private static void Blx(ArmEmitterContext context, bool x)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ bool isThumb = ((OpCode32)context.CurrOp).IsThumb;
+
+ uint currentPc = isThumb
+ ? pc | 1
+ : pc - 4;
+
+ SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ // If x is true, then this is a branch with link and exchange.
+ // In this case we need to swap the mode between Arm <-> Thumb.
+ if (x)
+ {
+ SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1));
+ }
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blxr(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ Operand addr = context.Copy(GetIntA32(context, op.Rm));
+ Operand bitOne = context.BitwiseAnd(addr, Const(1));
+
+ bool isThumb = ((OpCode32)context.CurrOp).IsThumb;
+
+ uint currentPc = isThumb
+ ? (pc - 2) | 1
+ : pc - 4;
+
+ SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ SetFlag(context, PState.TFlag, bitOne);
+
+ EmitBxWritePc(context, addr);
+ }
+
+ public static void Bx(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ EmitBxWritePc(context, GetIntA32(context, op.Rm), op.Rm);
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeT16BImmCmp op = (OpCodeT16BImmCmp)context.CurrOp;
+
+ Operand value = GetIntA32(context, op.Rn);
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+ }
+
+ public static void It(ArmEmitterContext context)
+ {
+ OpCodeT16IfThen op = (OpCodeT16IfThen)context.CurrOp;
+
+ context.SetIfThenBlockState(op.IfThenBlockConds);
+ }
+
+ public static void Tbb(ArmEmitterContext context) => EmitTb(context, halfword: false);
+ public static void Tbh(ArmEmitterContext context) => EmitTb(context, halfword: true);
+
+ private static void EmitTb(ArmEmitterContext context, bool halfword)
+ {
+ OpCodeT32Tb op = (OpCodeT32Tb)context.CurrOp;
+
+ Operand halfwords;
+
+ if (halfword)
+ {
+ Operand address = context.Add(GetIntA32(context, op.Rn), context.ShiftLeft(GetIntA32(context, op.Rm), Const(1)));
+ halfwords = InstEmitMemoryHelper.EmitReadInt(context, address, 1);
+ }
+ else
+ {
+ Operand address = context.Add(GetIntA32(context, op.Rn), GetIntA32(context, op.Rm));
+ halfwords = InstEmitMemoryHelper.EmitReadIntAligned(context, address, 0);
+ }
+
+ Operand targetAddress = context.Add(Const((int)op.GetPc()), context.ShiftLeft(halfwords, Const(1)));
+
+ EmitVirtualJump(context, targetAddress, isReturn: false);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs
new file mode 100644
index 0000000..2009baf
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs
@@ -0,0 +1,240 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using ARMeilleure.Translation.PTC;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitFlowHelper
+ {
+ public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond)
+ {
+ if (cond != Condition.Al)
+ {
+ context.BranchIfTrue(target, GetCondTrue(context, cond));
+ }
+ else
+ {
+ context.Branch(target);
+ }
+ }
+
+ public static Operand GetCondTrue(ArmEmitterContext context, Condition condition)
+ {
+ Operand cmpResult = context.TryGetComparisonResult(condition);
+
+ if (cmpResult != default)
+ {
+ return cmpResult;
+ }
+
+ Operand value = Const(1);
+
+ Operand Inverse(Operand val)
+ {
+ return context.BitwiseExclusiveOr(val, Const(1));
+ }
+
+ switch (condition)
+ {
+ case Condition.Eq:
+ value = GetFlag(PState.ZFlag);
+ break;
+
+ case Condition.Ne:
+ value = Inverse(GetFlag(PState.ZFlag));
+ break;
+
+ case Condition.GeUn:
+ value = GetFlag(PState.CFlag);
+ break;
+
+ case Condition.LtUn:
+ value = Inverse(GetFlag(PState.CFlag));
+ break;
+
+ case Condition.Mi:
+ value = GetFlag(PState.NFlag);
+ break;
+
+ case Condition.Pl:
+ value = Inverse(GetFlag(PState.NFlag));
+ break;
+
+ case Condition.Vs:
+ value = GetFlag(PState.VFlag);
+ break;
+
+ case Condition.Vc:
+ value = Inverse(GetFlag(PState.VFlag));
+ break;
+
+ case Condition.GtUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseAnd(c, Inverse(z));
+
+ break;
+ }
+
+ case Condition.LeUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseOr(Inverse(c), z);
+
+ break;
+ }
+
+ case Condition.Ge:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Lt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareNotEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Gt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v));
+
+ break;
+ }
+
+ case Condition.Le:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseOr(z, context.ICompareNotEqual(n, v));
+
+ break;
+ }
+ }
+
+ return value;
+ }
+
+ public static void EmitCall(ArmEmitterContext context, ulong immediate)
+ {
+ bool isRecursive = immediate == context.EntryAddress;
+
+ if (isRecursive)
+ {
+ context.Branch(context.GetLabel(immediate));
+ }
+ else
+ {
+ EmitTableBranch(context, Const(immediate), isJump: false);
+ }
+ }
+
+ public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
+ {
+ EmitTableBranch(context, target, isJump: false);
+ }
+
+ public static void EmitVirtualJump(ArmEmitterContext context, Operand target, bool isReturn)
+ {
+ if (isReturn)
+ {
+ if (target.Type == OperandType.I32)
+ {
+ target = context.ZeroExtend32(OperandType.I64, target);
+ }
+
+ context.Return(target);
+ }
+ else
+ {
+ EmitTableBranch(context, target, isJump: true);
+ }
+ }
+
+ private static void EmitTableBranch(ArmEmitterContext context, Operand guestAddress, bool isJump)
+ {
+ context.StoreToContext();
+
+ if (guestAddress.Type == OperandType.I32)
+ {
+ guestAddress = context.ZeroExtend32(OperandType.I64, guestAddress);
+ }
+
+ // Store the target guest address into the native context. The stubs uses this address to dispatch into the
+ // next translation.
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+ Operand dispAddressAddr = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()));
+ context.Store(dispAddressAddr, guestAddress);
+
+ Operand hostAddress;
+
+ // If address is mapped onto the function table, we can skip the table walk. Otherwise we fallback
+ // onto the dispatch stub.
+ if (guestAddress.Kind == OperandKind.Constant && context.FunctionTable.IsValid(guestAddress.Value))
+ {
+ Operand hostAddressAddr = !context.HasPtc ?
+ Const(ref context.FunctionTable.GetValue(guestAddress.Value)) :
+ Const(ref context.FunctionTable.GetValue(guestAddress.Value), new Symbol(SymbolType.FunctionTable, guestAddress.Value));
+
+ hostAddress = context.Load(OperandType.I64, hostAddressAddr);
+ }
+ else
+ {
+ hostAddress = !context.HasPtc ?
+ Const((long)context.Stubs.DispatchStub) :
+ Const((long)context.Stubs.DispatchStub, Ptc.DispatchStubSymbol);
+ }
+
+ if (isJump)
+ {
+ context.Tailcall(hostAddress, nativeContext);
+ }
+ else
+ {
+ OpCode op = context.CurrOp;
+
+ Operand returnAddress = context.Call(hostAddress, OperandType.I64, nativeContext);
+
+ context.LoadFromContext();
+
+ // Note: The return value of a translated function is always an Int64 with the address execution has
+ // returned to. We expect this address to be immediately after the current instruction, if it isn't we
+ // keep returning until we reach the dispatcher.
+ Operand nextAddr = Const((long)op.Address + op.OpCodeSizeInBytes);
+
+ // Try to continue within this block.
+ // If the return address isn't to our next instruction, we need to return so the JIT can figure out
+ // what to do.
+ Operand lblContinue = context.GetLabel(nextAddr.Value);
+ context.BranchIf(lblContinue, returnAddress, nextAddr, Comparison.Equal, BasicBlockFrequency.Cold);
+
+ context.Return(returnAddress);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHash.cs b/src/ARMeilleure/Instructions/InstEmitHash.cs
new file mode 100644
index 0000000..82b3e35
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHash.cs
@@ -0,0 +1,69 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHashHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, false);
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, false);
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, false);
+ }
+
+ public static void Crc32x(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, DWordSizeLog2, false);
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, true);
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, true);
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, true);
+ }
+
+ public static void Crc32cx(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, DWordSizeLog2, true);
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = EmitCrc32(context, n, m, size, c);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHash32.cs b/src/ARMeilleure/Instructions/InstEmitHash32.cs
new file mode 100644
index 0000000..30c893a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHash32.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitHashHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, false);
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, false);
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, false);
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, ByteSizeLog2, true);
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, HWordSizeLog2, true);
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, WordSizeLog2, true);
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c)
+ {
+ IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ Operand d = EmitCrc32(context, n, m, size, c);
+
+ EmitAluStore(context, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs
new file mode 100644
index 0000000..9b1ad87
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs
@@ -0,0 +1,124 @@
+// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHashHelper
+ {
+ public const uint Crc32RevPoly = 0xedb88320;
+ public const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli)
+ {
+ Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger());
+ Debug.Assert(size >= 0 && size < 4);
+ Debug.Assert((size < 3) || (value.Type == OperandType.I64));
+
+ if (castagnoli && Optimizations.UseSse42)
+ {
+ // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers.
+ value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value;
+ crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc;
+
+ Intrinsic op = size switch
+ {
+ 0 => Intrinsic.X86Crc32_8,
+ 1 => Intrinsic.X86Crc32_16,
+ _ => Intrinsic.X86Crc32,
+ };
+
+ return (size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value);
+ }
+ else if (Optimizations.UsePclmulqdq)
+ {
+ return size switch
+ {
+ 3 => EmitCrc32Optimized64(context, crc, value, castagnoli),
+ _ => EmitCrc32Optimized(context, crc, value, castagnoli, size),
+ };
+ }
+ else
+ {
+ string name = (size, castagnoli) switch
+ {
+ (0, false) => nameof(SoftFallback.Crc32b),
+ (1, false) => nameof(SoftFallback.Crc32h),
+ (2, false) => nameof(SoftFallback.Crc32w),
+ (3, false) => nameof(SoftFallback.Crc32x),
+ (0, true) => nameof(SoftFallback.Crc32cb),
+ (1, true) => nameof(SoftFallback.Crc32ch),
+ (2, true) => nameof(SoftFallback.Crc32cw),
+ (3, true) => nameof(SoftFallback.Crc32cx),
+ _ => throw new ArgumentOutOfRangeException(nameof(size)),
+ };
+
+ return context.Call(typeof(SoftFallback).GetMethod(name), crc, value);
+ }
+ }
+
+ private static Operand EmitCrc32Optimized(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli, int size)
+ {
+ long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
+ long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
+
+ crc = context.VectorInsert(context.VectorZero(), crc, 0);
+
+ switch (size)
+ {
+ case 0:
+ data = context.VectorInsert8(context.VectorZero(), data, 0);
+ break;
+ case 1:
+ data = context.VectorInsert16(context.VectorZero(), data, 0);
+ break;
+ case 2:
+ data = context.VectorInsert(context.VectorZero(), data, 0);
+ break;
+ }
+
+ int bitsize = 8 << size;
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
+ tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ if (bitsize < 32)
+ {
+ crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc);
+ }
+
+ return context.VectorExtract(OperandType.I32, tmp, 2);
+ }
+
+ private static Operand EmitCrc32Optimized64(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli)
+ {
+ long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
+ long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
+
+ crc = context.VectorInsert(context.VectorZero(), crc, 0);
+ data = context.VectorInsert(context.VectorZero(), data, 0);
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
+ tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));
+
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
+
+ return context.VectorExtract(OperandType.I32, tmp, 2);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitHelper.cs b/src/ARMeilleure/Instructions/InstEmitHelper.cs
new file mode 100644
index 0000000..7a515f9
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitHelper.cs
@@ -0,0 +1,249 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHelper
+ {
+ public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type)
+ {
+ Operand value = GetIntOrZR(context, rm);
+
+ switch (type)
+ {
+ case IntType.UInt8:
+ value = context.ZeroExtend8(value.Type, value);
+ break;
+ case IntType.UInt16:
+ value = context.ZeroExtend16(value.Type, value);
+ break;
+ case IntType.UInt32:
+ value = context.ZeroExtend32(value.Type, value);
+ break;
+
+ case IntType.Int8:
+ value = context.SignExtend8(value.Type, value);
+ break;
+ case IntType.Int16:
+ value = context.SignExtend16(value.Type, value);
+ break;
+ case IntType.Int32:
+ value = context.SignExtend32(value.Type, value);
+ break;
+ }
+
+ return value;
+ }
+
+ public static Operand GetIntA32(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)op.GetPc());
+ }
+ else
+ {
+ return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+ }
+ }
+
+ public static Operand GetIntA32AlignedPC(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)(op.GetPc() & 0xfffffffc));
+ }
+ else
+ {
+ return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+ }
+ }
+
+ public static Operand GetVecA32(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ if (!IsA32Return(context))
+ {
+ context.StoreToContext();
+ }
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ if (value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+ Operand reg = Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32);
+
+ context.Copy(reg, value);
+ }
+ }
+
+ public static int GetRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ // Only registers >= 8 are banked,
+ // with registers in the range [8, 12] being
+ // banked for the FIQ mode, and registers
+ // 13 and 14 being banked for all modes.
+ if ((uint)regIndex < 8)
+ {
+ return regIndex;
+ }
+
+ return GetBankedRegisterAlias(mode, regIndex);
+ }
+
+ public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ return regIndex switch
+ {
+#pragma warning disable IDE0055 // Disable formatting
+ 8 => mode == Aarch32Mode.Fiq ? RegisterAlias.R8Fiq : RegisterAlias.R8Usr,
+ 9 => mode == Aarch32Mode.Fiq ? RegisterAlias.R9Fiq : RegisterAlias.R9Usr,
+ 10 => mode == Aarch32Mode.Fiq ? RegisterAlias.R10Fiq : RegisterAlias.R10Usr,
+ 11 => mode == Aarch32Mode.Fiq ? RegisterAlias.R11Fiq : RegisterAlias.R11Usr,
+ 12 => mode == Aarch32Mode.Fiq ? RegisterAlias.R12Fiq : RegisterAlias.R12Usr,
+ 13 => mode switch
+ {
+ Aarch32Mode.User or Aarch32Mode.System => RegisterAlias.SpUsr,
+ Aarch32Mode.Fiq => RegisterAlias.SpFiq,
+ Aarch32Mode.Irq => RegisterAlias.SpIrq,
+ Aarch32Mode.Supervisor => RegisterAlias.SpSvc,
+ Aarch32Mode.Abort => RegisterAlias.SpAbt,
+ Aarch32Mode.Hypervisor => RegisterAlias.SpHyp,
+ Aarch32Mode.Undefined => RegisterAlias.SpUnd,
+ _ => throw new ArgumentException($"No such AArch32Mode: {mode}", nameof(mode)),
+ },
+ 14 => mode switch
+ {
+ Aarch32Mode.User or Aarch32Mode.Hypervisor or Aarch32Mode.System => RegisterAlias.LrUsr,
+ Aarch32Mode.Fiq => RegisterAlias.LrFiq,
+ Aarch32Mode.Irq => RegisterAlias.LrIrq,
+ Aarch32Mode.Supervisor => RegisterAlias.LrSvc,
+ Aarch32Mode.Abort => RegisterAlias.LrAbt,
+ Aarch32Mode.Undefined => RegisterAlias.LrUnd,
+ _ => throw new ArgumentException($"No such AArch32Mode: {mode}", nameof(mode)),
+ },
+ _ => throw new ArgumentOutOfRangeException(nameof(regIndex), regIndex, null),
+#pragma warning restore IDE0055
+ };
+ }
+
+ public static bool IsA32Return(ArmEmitterContext context)
+ {
+ return context.CurrOp switch
+ {
+ IOpCode32MemMult => true, // Setting PC using LDM is nearly always a return.
+ OpCode32AluRsImm op => op.Rm == RegisterAlias.Aarch32Lr,
+ OpCode32AluRsReg op => op.Rm == RegisterAlias.Aarch32Lr,
+ OpCode32AluReg op => op.Rm == RegisterAlias.Aarch32Lr,
+ OpCode32Mem op => op.Rn == RegisterAlias.Aarch32Sp && op.WBack && !op.Index, // Setting PC to an address stored on the stack is nearly always a return.
+ _ => false,
+ };
+ }
+
+ public static void EmitBxWritePc(ArmEmitterContext context, Operand pc, int sourceRegister = 0)
+ {
+ bool isReturn = sourceRegister == RegisterAlias.Aarch32Lr || IsA32Return(context);
+ Operand mode = context.BitwiseAnd(pc, Const(1));
+
+ SetFlag(context, PState.TFlag, mode);
+
+ Operand addr = context.ConditionalSelect(mode, context.BitwiseAnd(pc, Const(~1)), context.BitwiseAnd(pc, Const(~3)));
+
+ InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn);
+ }
+
+ public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ OperandType type = context.CurrOp.GetOperandType();
+
+ return type == OperandType.I32 ? Const(0) : Const(0L);
+ }
+ else
+ {
+ return GetIntOrSP(context, regIndex);
+ }
+ }
+
+ public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, regIndex, value);
+ }
+
+ public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex)
+ {
+ Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ return value;
+ }
+
+ public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (value.Type == OperandType.I32)
+ {
+ value = context.ZeroExtend32(OperandType.I64, value);
+ }
+
+ context.Copy(reg, value);
+ }
+
+ public static Operand GetVec(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static Operand GetFlag(PState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.Flag, OperandType.I32);
+ }
+
+ public static Operand GetFpFlag(FPState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.FpFlag, OperandType.I32);
+ }
+
+ public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value)
+ {
+ context.Copy(GetFlag(stateFlag), value);
+
+ context.MarkFlagSet(stateFlag);
+ }
+
+ public static void SetFpFlag(ArmEmitterContext context, FPState stateFlag, Operand value)
+ {
+ context.Copy(GetFpFlag(stateFlag), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemory.cs b/src/ARMeilleure/Instructions/InstEmitMemory.cs
new file mode 100644
index 0000000..840099f
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemory.cs
@@ -0,0 +1,184 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adr(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate));
+ }
+
+ public static void Adrp(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12);
+
+ SetIntOrZR(context, op.Rd, Const(address));
+ }
+
+ public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false);
+ public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true);
+
+ private static void EmitLdr(ArmEmitterContext context, bool signed)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ if (signed && op.Extend64)
+ {
+ EmitLoadSx64(context, address, op.Rt, op.Size);
+ }
+ else if (signed)
+ {
+ EmitLoadSx32(context, address, op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, address, op.Rt, op.Size);
+ }
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Ldr_Literal(ArmEmitterContext context)
+ {
+ IOpCodeLit op = (IOpCodeLit)context.CurrOp;
+
+ if (op.Prefetch)
+ {
+ return;
+ }
+
+ if (op.Signed)
+ {
+ EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ }
+
+ public static void Ldp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ void EmitLoad(int rt, Operand ldAddr)
+ {
+ if (op.Extend64)
+ {
+ EmitLoadSx64(context, ldAddr, rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, ldAddr, rt, op.Size);
+ }
+ }
+
+ Operand address = GetAddress(context);
+ Operand address2 = GetAddress(context, 1L << op.Size);
+
+ EmitLoad(op.Rt, address);
+ EmitLoad(op.Rt2, address2);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ EmitStore(context, address, op.Rt, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Stp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ Operand address = GetAddress(context);
+ Operand address2 = GetAddress(context, 1L << op.Size);
+
+ EmitStore(context, address, op.Rt, op.Size);
+ EmitStore(context, address2, op.Rt2, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ private static Operand GetAddress(ArmEmitterContext context, long addend = 0)
+ {
+ Operand address = default;
+
+ switch (context.CurrOp)
+ {
+ case OpCodeMemImm op:
+ {
+ address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ // Pre-indexing.
+ if (!op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate + addend));
+ }
+ else if (addend != 0)
+ {
+ address = context.Add(address, Const(addend));
+ }
+
+ break;
+ }
+
+ case OpCodeMemReg op:
+ {
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ Operand m = GetExtendedM(context, op.Rm, op.IntType);
+
+ if (op.Shift)
+ {
+ m = context.ShiftLeft(m, Const(op.Size));
+ }
+
+ address = context.Add(n, m);
+
+ if (addend != 0)
+ {
+ address = context.Add(address, Const(addend));
+ }
+
+ break;
+ }
+ }
+
+ return address;
+ }
+
+ private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address)
+ {
+ // Check whenever the current OpCode has post-indexed write back, if so write it.
+ if (context.CurrOp is OpCodeMemImm op && op.WBack)
+ {
+ if (op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate));
+ }
+
+ SetIntOrSP(context, op.Rn, address);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemory32.cs b/src/ARMeilleure/Instructions/InstEmitMemory32.cs
new file mode 100644
index 0000000..cee0670
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemory32.cs
@@ -0,0 +1,264 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ [Flags]
+ enum AccessType
+ {
+ Store = 0,
+ Signed = 1,
+ Load = 2,
+ Ordered = 4,
+ Exclusive = 8,
+
+ LoadZx = Load,
+ LoadSx = Load | Signed,
+ }
+
+ public static void Ldm(ArmEmitterContext context)
+ {
+ IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0;
+
+ bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc);
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitLoadZx(context, address, register, WordSizeLog2);
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Ldr(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrsb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Ldrsh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Stm(ArmEmitterContext context)
+ {
+ IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStore(context, address, register, WordSizeLog2);
+
+ // Note: If Rn is also specified on the register list,
+ // and Rn is the first register on this list, then the
+ // value that is written to memory is the unmodified value,
+ // before the write back. If it is on the list, but it's
+ // not the first one, then the value written to memory
+ // varies between CPUs.
+ if (offset == 0 && op.PostOffset != 0)
+ {
+ // Emit write back after the first write.
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store);
+ }
+
+ public static void Strd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store);
+ }
+
+ private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ IOpCode32Mem op = (IOpCode32Mem)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32AlignedPC(context, op.Rn));
+ Operand m = GetMemM(context, setCarry: false);
+
+ Operand temp = default;
+
+ if (op.Index || op.WBack)
+ {
+ temp = op.Add
+ ? context.Add(n, m)
+ : context.Subtract(n, m);
+ }
+
+ if (op.WBack)
+ {
+ SetIntA32(context, op.Rn, temp);
+ }
+
+ Operand address;
+
+ if (op.Index)
+ {
+ address = temp;
+ }
+ else
+ {
+ address = n;
+ }
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ void Load(int rt, int offs, int loadSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ if ((accType & AccessType.Signed) != 0)
+ {
+ EmitLoadSx32(context, addr, rt, loadSize);
+ }
+ else
+ {
+ EmitLoadZx(context, addr, rt, loadSize);
+ }
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Load(op.Rt, 0, WordSizeLog2);
+ Load(op.Rt2, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Load(op.Rt2, 0, WordSizeLog2);
+ Load(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Load(op.Rt, 0, size);
+ }
+ }
+ else
+ {
+ void Store(int rt, int offs, int storeSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ EmitStore(context, addr, rt, storeSize);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Store(op.Rt, 0, WordSizeLog2);
+ Store(op.Rt2, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Store(op.Rt2, 0, WordSizeLog2);
+ Store(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Store(op.Rt, 0, size);
+ }
+ }
+ }
+
+ public static void Adr(ArmEmitterContext context)
+ {
+ IOpCode32Adr op = (IOpCode32Adr)context.CurrOp;
+ SetIntA32(context, op.Rd, Const(op.Immediate));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs
new file mode 100644
index 0000000..8c95b33
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs
@@ -0,0 +1,177 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryExHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ [Flags]
+ private enum AccessType
+ {
+ None = 0,
+ Ordered = 1,
+ Exclusive = 2,
+ OrderedEx = Ordered | Exclusive,
+ }
+
+ public static void Clrex(ArmEmitterContext context)
+ {
+ EmitClearExclusive(context);
+ }
+
+ public static void Csdb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered);
+ public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx);
+ public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive);
+ public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive);
+ public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx);
+
+ private static void EmitLdr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: false);
+ }
+
+ private static void EmitLdp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: true);
+ }
+
+ private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ if (pair)
+ {
+ // Exclusive loads should be atomic. For pairwise loads, we need to
+ // read all the data at once. For a 32-bits pairwise load, we do a
+ // simple 64-bits load, for a 128-bits load, we need to call a special
+ // method to read 128-bits atomically.
+ if (op.Size == 2)
+ {
+ Operand value = EmitLoadExclusive(context, address, exclusive, 3);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else if (op.Size == 3)
+ {
+ Operand value = EmitLoadExclusive(context, address, exclusive, 4);
+
+ Operand valueLow = context.VectorExtract(OperandType.I64, value, 0);
+ Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1);
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
+ }
+ }
+ else
+ {
+ // 8, 16, 32 or 64-bits (non-pairwise) load.
+ Operand value = EmitLoadExclusive(context, address, exclusive, op.Size);
+
+ SetIntOrZR(context, op.Rt, value);
+ }
+ }
+
+ public static void Prfm(ArmEmitterContext context)
+ {
+ // Memory Prefetch, execute as no-op.
+ }
+
+ public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered);
+ public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx);
+ public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive);
+ public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive);
+ public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx);
+
+ private static void EmitStr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: false);
+ }
+
+ private static void EmitStp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: true);
+ }
+
+ private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ if (pair)
+ {
+ Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store.");
+
+ Operand t2 = GetIntOrZR(context, op.Rt2);
+
+ Operand value;
+
+ if (op.Size == 2)
+ {
+ value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32)));
+ }
+ else /* if (op.Size == 3) */
+ {
+ value = context.VectorInsert(context.VectorZero(), t, 0);
+ value = context.VectorInsert(value, t2, 1);
+ }
+
+ EmitStoreExclusive(context, address, value, exclusive, op.Size + 1, op.Rs, a32: false);
+ }
+ else
+ {
+ EmitStoreExclusive(context, address, t, exclusive, op.Size, op.Rs, a32: false);
+ }
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ context.MemoryBarrier();
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
new file mode 100644
index 0000000..1502188
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs
@@ -0,0 +1,237 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryExHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Clrex(ArmEmitterContext context)
+ {
+ EmitClearExclusive(context);
+ }
+
+ public static void Csdb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldrex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Ldrexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive);
+ }
+
+ public static void Lda(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ public static void Ldab(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ public static void Ldaex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldaexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Ldah(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Ordered);
+ }
+
+ // Stores.
+
+ public static void Strex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Strexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive);
+ }
+
+ public static void Stl(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ public static void Stlb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ public static void Stlex(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexb(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexd(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlexh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered);
+ }
+
+ public static void Stlh(ArmEmitterContext context)
+ {
+ EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Ordered);
+ }
+
+ private static void EmitExLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ IOpCode32MemEx op = (IOpCode32MemEx)context.CurrOp;
+
+ Operand address = context.Copy(GetIntA32(context, op.Rn));
+
+ var exclusive = (accType & AccessType.Exclusive) != 0;
+ var ordered = (accType & AccessType.Ordered) != 0;
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ // Keep loads atomic - make the call to get the whole region and then decompose it into parts
+ // for the registers.
+
+ Operand value = EmitLoadExclusive(context, address, exclusive, size);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ SetIntA32(context, op.Rt, valueLow);
+ SetIntA32(context, op.Rt2, valueHigh);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ SetIntA32(context, op.Rt2, valueLow);
+ SetIntA32(context, op.Rt, valueHigh);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ SetIntA32(context, op.Rt, EmitLoadExclusive(context, address, exclusive, size));
+ }
+ }
+ else
+ {
+ if (size == DWordSizeLog2)
+ {
+ // Split the result into 2 words (based on endianness)
+
+ Operand lo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt));
+ Operand hi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt2));
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32)));
+ EmitStoreExclusive(context, address, leResult, exclusive, size, op.Rd, a32: true);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32)));
+ EmitStoreExclusive(context, address, beResult, exclusive, size, op.Rd, a32: true);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Operand value = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt));
+ EmitStoreExclusive(context, address, value, exclusive, size, op.Rd, a32: true);
+ }
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ // Note: This barrier is most likely not necessary, and probably
+ // doesn't make any difference since we need to do a ton of stuff
+ // (software MMU emulation) to read or write anything anyway.
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
new file mode 100644
index 0000000..7fca5b8
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs
@@ -0,0 +1,173 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryExHelper
+ {
+ private const int ErgSizeLog2 = 4;
+
+ public static Operand EmitLoadExclusive(ArmEmitterContext context, Operand address, bool exclusive, int size)
+ {
+ if (exclusive)
+ {
+ Operand value;
+
+ if (size == 4)
+ {
+ // Only 128-bit CAS is guaranteed to have a atomic load.
+ Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: false, 4);
+
+ Operand zero = context.VectorZero();
+
+ value = context.CompareAndSwap(physAddr, zero, zero);
+ }
+ else
+ {
+ value = InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
+ }
+
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+ Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+
+ context.Store(exAddrPtr, context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask())));
+
+ // Make sure the unused higher bits of the value are cleared.
+ if (size < 3)
+ {
+ context.Store(exValuePtr, Const(0UL));
+ }
+ if (size < 4)
+ {
+ context.Store(context.Add(exValuePtr, Const(exValuePtr.Type, 8L)), Const(0UL));
+ }
+
+ // Store the new exclusive value.
+ context.Store(exValuePtr, value);
+
+ return value;
+ }
+ else
+ {
+ return InstEmitMemoryHelper.EmitReadIntAligned(context, address, size);
+ }
+ }
+
+ public static void EmitStoreExclusive(
+ ArmEmitterContext context,
+ Operand address,
+ Operand value,
+ bool exclusive,
+ int size,
+ int rs,
+ bool a32)
+ {
+ if (size < 3)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ if (exclusive)
+ {
+ // We overwrite one of the register (Rs),
+ // keep a copy of the values to ensure we are working with the correct values.
+ address = context.Copy(address);
+ value = context.Copy(value);
+
+ void SetRs(Operand value)
+ {
+ if (a32)
+ {
+ SetIntA32(context, rs, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rs, value);
+ }
+ }
+
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+ Operand exAddr = context.Load(address.Type, exAddrPtr);
+
+ // STEP 1: Check if we have exclusive access to this memory region. If not, fail and skip store.
+ Operand maskedAddress = context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask()));
+
+ Operand exFailed = context.ICompareNotEqual(exAddr, maskedAddress);
+
+ Operand lblExit = Label();
+
+ SetRs(Const(1));
+
+ context.BranchIfTrue(lblExit, exFailed);
+
+ // STEP 2: We have exclusive access and the address is valid, attempt the store using CAS.
+ Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: true, size);
+
+ Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset()));
+ Operand exValue = size switch
+ {
+ 0 => context.Load8(exValuePtr),
+ 1 => context.Load16(exValuePtr),
+ 2 => context.Load(OperandType.I32, exValuePtr),
+ 3 => context.Load(OperandType.I64, exValuePtr),
+ _ => context.Load(OperandType.V128, exValuePtr),
+ };
+
+ Operand currValue = size switch
+ {
+ 0 => context.CompareAndSwap8(physAddr, exValue, value),
+ 1 => context.CompareAndSwap16(physAddr, exValue, value),
+ _ => context.CompareAndSwap(physAddr, exValue, value),
+ };
+
+ // STEP 3: Check if we succeeded by comparing expected and in-memory values.
+ Operand storeFailed;
+
+ if (size == 4)
+ {
+ Operand currValueLow = context.VectorExtract(OperandType.I64, currValue, 0);
+ Operand currValueHigh = context.VectorExtract(OperandType.I64, currValue, 1);
+
+ Operand exValueLow = context.VectorExtract(OperandType.I64, exValue, 0);
+ Operand exValueHigh = context.VectorExtract(OperandType.I64, exValue, 1);
+
+ storeFailed = context.BitwiseOr(
+ context.ICompareNotEqual(currValueLow, exValueLow),
+ context.ICompareNotEqual(currValueHigh, exValueHigh));
+ }
+ else
+ {
+ storeFailed = context.ICompareNotEqual(currValue, exValue);
+ }
+
+ SetRs(storeFailed);
+
+ context.MarkLabel(lblExit);
+ }
+ else
+ {
+ InstEmitMemoryHelper.EmitWriteIntAligned(context, address, value, size);
+ }
+ }
+
+ public static void EmitClearExclusive(ArmEmitterContext context)
+ {
+ Operand arg0 = context.LoadArgument(OperandType.I64, 0);
+
+ Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset()));
+
+ // We store ULONG max to force any exclusive address checks to fail,
+ // since this value is not aligned to the ERG mask.
+ context.Store(exAddrPtr, Const(ulong.MaxValue));
+ }
+
+ private static long GetExclusiveAddressMask() => ~((4L << ErgSizeLog2) - 1);
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
new file mode 100644
index 0000000..ace6fe1
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -0,0 +1,778 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Translation;
+using ARMeilleure.Translation.PTC;
+using System;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryHelper
+ {
+ private const int PageBits = 12;
+ private const int PageMask = (1 << PageBits) - 1;
+
+ private enum Extension
+ {
+ Zx,
+ Sx32,
+ Sx64,
+ }
+
+ public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Zx, rt, size);
+ }
+
+ public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx32, rt, size);
+ }
+
+ public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx64, rt, size);
+ }
+
+ private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitReadVector(context, address, context.VectorZero(), rt, 0, size);
+ }
+ else
+ {
+ EmitReadInt(context, address, rt, size);
+ }
+
+ if (!isSimd && !(context.CurrOp is OpCode32 && rt == State.RegisterAlias.Aarch32Pc))
+ {
+ Operand value = GetInt(context, rt);
+
+ if (ext == Extension.Sx32 || ext == Extension.Sx64)
+ {
+ OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32;
+
+ switch (size)
+ {
+ case 0:
+ value = context.SignExtend8(destType, value);
+ break;
+ case 1:
+ value = context.SignExtend16(destType, value);
+ break;
+ case 2:
+ value = context.SignExtend32(destType, value);
+ break;
+ }
+ }
+
+ SetInt(context, rt, value);
+ }
+ }
+
+ public static void EmitLoadSimd(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitReadVector(context, address, vector, rt, elem, size);
+ }
+
+ public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitWriteVector(context, address, rt, 0, size);
+ }
+ else
+ {
+ EmitWriteInt(context, address, rt, size);
+ }
+ }
+
+ public static void EmitStoreSimd(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitWriteVector(context, address, rt, elem, size);
+ }
+
+ private static bool IsSimd(ArmEmitterContext context)
+ {
+ return context.CurrOp is IOpCodeSimd &&
+ !(context.CurrOp is OpCodeSimdMemMs ||
+ context.CurrOp is OpCodeSimdMemSs);
+ }
+
+ public static Operand EmitReadInt(ArmEmitterContext context, Operand address, int size)
+ {
+ Operand temp = context.AllocateLocal(size == 3 ? OperandType.I64 : OperandType.I32);
+
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0:
+ value = context.Load8(physAddr);
+ break;
+ case 1:
+ value = context.Load16(physAddr);
+ break;
+ case 2:
+ value = context.Load(OperandType.I32, physAddr);
+ break;
+ case 3:
+ value = context.Load(OperandType.I64, physAddr);
+ break;
+ }
+
+ context.Copy(temp, value);
+
+ if (!context.Memory.Type.IsHostMappedOrTracked())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ context.Copy(temp, EmitReadIntFallback(context, address, size));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ return temp;
+ }
+
+ private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0:
+ value = context.Load8(physAddr);
+ break;
+ case 1:
+ value = context.Load16(physAddr);
+ break;
+ case 2:
+ value = context.Load(OperandType.I32, physAddr);
+ break;
+ case 3:
+ value = context.Load(OperandType.I64, physAddr);
+ break;
+ }
+
+ SetInt(context, rt, value);
+
+ if (!context.Memory.Type.IsHostMappedOrTracked())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitReadIntFallback(context, address, rt, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static Operand EmitReadIntAligned(ArmEmitterContext context, Operand address, int size)
+ {
+ if ((uint)size > 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ Operand physAddr = EmitPtPointerLoad(context, address, default, write: false, size);
+
+ return size switch
+ {
+ 0 => context.Load8(physAddr),
+ 1 => context.Load16(physAddr),
+ 2 => context.Load(OperandType.I32, physAddr),
+ 3 => context.Load(OperandType.I64, physAddr),
+ _ => context.Load(OperandType.V128, physAddr),
+ };
+ }
+
+ private static void EmitReadVector(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size);
+
+ Operand value = default;
+
+ switch (size)
+ {
+ case 0:
+ value = context.VectorInsert8(vector, context.Load8(physAddr), elem);
+ break;
+ case 1:
+ value = context.VectorInsert16(vector, context.Load16(physAddr), elem);
+ break;
+ case 2:
+ value = context.VectorInsert(vector, context.Load(OperandType.I32, physAddr), elem);
+ break;
+ case 3:
+ value = context.VectorInsert(vector, context.Load(OperandType.I64, physAddr), elem);
+ break;
+ case 4:
+ value = context.Load(OperandType.V128, physAddr);
+ break;
+ }
+
+ context.Copy(GetVec(rt), value);
+
+ if (!context.Memory.Type.IsHostMappedOrTracked())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitReadVectorFallback(context, address, vector, rt, elem, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ private static Operand VectorCreate(ArmEmitterContext context, Operand value)
+ {
+ return context.VectorInsert(context.VectorZero(), value, 0);
+ }
+
+ private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0:
+ context.Store8(physAddr, value);
+ break;
+ case 1:
+ context.Store16(physAddr, value);
+ break;
+ case 2:
+ context.Store(physAddr, value);
+ break;
+ case 3:
+ context.Store(physAddr, value);
+ break;
+ }
+
+ if (!context.Memory.Type.IsHostMappedOrTracked())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitWriteIntFallback(context, address, rt, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static void EmitWriteIntAligned(ArmEmitterContext context, Operand address, Operand value, int size)
+ {
+ if ((uint)size > 4)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ Operand physAddr = EmitPtPointerLoad(context, address, default, write: true, size);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ if (size == 0)
+ {
+ context.Store8(physAddr, value);
+ }
+ else if (size == 1)
+ {
+ context.Store16(physAddr, value);
+ }
+ else
+ {
+ context.Store(physAddr, value);
+ }
+ }
+
+ private static void EmitWriteVector(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size);
+
+ Operand value = GetVec(rt);
+
+ switch (size)
+ {
+ case 0:
+ context.Store8(physAddr, context.VectorExtract8(value, elem));
+ break;
+ case 1:
+ context.Store16(physAddr, context.VectorExtract16(value, elem));
+ break;
+ case 2:
+ context.Store(physAddr, context.VectorExtract(OperandType.I32, value, elem));
+ break;
+ case 3:
+ context.Store(physAddr, context.VectorExtract(OperandType.I64, value, elem));
+ break;
+ case 4:
+ context.Store(physAddr, value);
+ break;
+ }
+
+ if (!context.Memory.Type.IsHostMappedOrTracked())
+ {
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold);
+
+ EmitWriteVectorFallback(context, address, rt, elem, size);
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+
+ public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write, int size)
+ {
+ if (context.Memory.Type.IsHostMapped())
+ {
+ return EmitHostMappedPointer(context, address);
+ }
+ else if (context.Memory.Type.IsHostTracked())
+ {
+ if (address.Type == OperandType.I32)
+ {
+ address = context.ZeroExtend32(OperandType.I64, address);
+ }
+
+ if (context.Memory.Type == MemoryManagerType.HostTracked)
+ {
+ Operand mask = Const(ulong.MaxValue >> (64 - context.Memory.AddressSpaceBits));
+ address = context.BitwiseAnd(address, mask);
+ }
+
+ Operand ptBase = !context.HasPtc
+ ? Const(context.Memory.PageTablePointer.ToInt64())
+ : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol);
+
+ Operand ptOffset = context.ShiftRightUI(address, Const(PageBits));
+
+ return context.Add(address, context.Load(OperandType.I64, context.Add(ptBase, context.ShiftLeft(ptOffset, Const(3)))));
+ }
+
+ int ptLevelBits = context.Memory.AddressSpaceBits - PageBits;
+ int ptLevelSize = 1 << ptLevelBits;
+ int ptLevelMask = ptLevelSize - 1;
+
+ Operand addrRotated = size != 0 ? context.RotateRight(address, Const(size)) : address;
+ Operand addrShifted = context.ShiftRightUI(addrRotated, Const(PageBits - size));
+
+ Operand pte = !context.HasPtc
+ ? Const(context.Memory.PageTablePointer.ToInt64())
+ : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol);
+
+ Operand pteOffset = context.BitwiseAnd(addrShifted, Const(addrShifted.Type, ptLevelMask));
+
+ if (pteOffset.Type == OperandType.I32)
+ {
+ pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+ }
+
+ pte = context.Load(OperandType.I64, context.Add(pte, context.ShiftLeft(pteOffset, Const(3))));
+
+ if (addrShifted.Type == OperandType.I32)
+ {
+ addrShifted = context.ZeroExtend32(OperandType.I64, addrShifted);
+ }
+
+ // If the VA is out of range, or not aligned to the access size, force PTE to 0 by masking it.
+ pte = context.BitwiseAnd(pte, context.ShiftRightSI(context.Add(addrShifted, Const(-(long)ptLevelSize)), Const(63)));
+
+ if (lblSlowPath != default)
+ {
+ if (write)
+ {
+ context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
+ pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
+ }
+ else
+ {
+ pte = context.ShiftLeft(pte, Const(1));
+ context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual);
+ pte = context.ShiftRightUI(pte, Const(1));
+ }
+ }
+ else
+ {
+ // When no label is provided to jump to a slow path if the address is invalid,
+ // we do the validation ourselves, and throw if needed.
+
+ Operand lblNotWatched = Label();
+
+ // Is the page currently being tracked for read/write? If so we need to call SignalMemoryTracking.
+ context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual, BasicBlockFrequency.Cold);
+
+ // Signal memory tracking. Size here doesn't matter as address is assumed to be size aligned here.
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking)), address, Const(1UL), Const(write ? 1 : 0));
+ context.MarkLabel(lblNotWatched);
+
+ pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access)
+
+ Operand lblNonNull = Label();
+
+ // Skip exception if the PTE address is non-null (not zero).
+ context.BranchIfTrue(lblNonNull, pte, BasicBlockFrequency.Cold);
+
+ // The call is not expected to return (it should throw).
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address);
+ context.MarkLabel(lblNonNull);
+ }
+
+ Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, PageMask));
+
+ if (pageOffset.Type == OperandType.I32)
+ {
+ pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset);
+ }
+
+ return context.Add(pte, pageOffset);
+ }
+
+ public static Operand EmitHostMappedPointer(ArmEmitterContext context, Operand address)
+ {
+ if (address.Type == OperandType.I32)
+ {
+ address = context.ZeroExtend32(OperandType.I64, address);
+ }
+
+ if (context.Memory.Type == MemoryManagerType.HostMapped)
+ {
+ Operand mask = Const(ulong.MaxValue >> (64 - context.Memory.AddressSpaceBits));
+ address = context.BitwiseAnd(address, mask);
+ }
+
+ Operand baseAddr = !context.HasPtc
+ ? Const(context.Memory.PageTablePointer.ToInt64())
+ : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol);
+
+ return context.Add(baseAddr, address);
+ }
+
+ private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ SetInt(context, rt, EmitReadIntFallback(context, address, size));
+ }
+
+ private static Operand EmitReadIntFallback(ArmEmitterContext context, Operand address, int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte));
+ break;
+ case 1:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16));
+ break;
+ case 2:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32));
+ break;
+ case 3:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64));
+ break;
+ }
+
+ return context.Call(info, address);
+ }
+
+ private static void EmitReadVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte));
+ break;
+ case 1:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16));
+ break;
+ case 2:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32));
+ break;
+ case 3:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64));
+ break;
+ case 4:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128));
+ break;
+ }
+
+ Operand value = context.Call(info, address);
+
+ switch (size)
+ {
+ case 0:
+ value = context.VectorInsert8(vector, value, elem);
+ break;
+ case 1:
+ value = context.VectorInsert16(vector, value, elem);
+ break;
+ case 2:
+ value = context.VectorInsert(vector, value, elem);
+ break;
+ case 3:
+ value = context.VectorInsert(vector, value, elem);
+ break;
+ }
+
+ context.Copy(GetVec(rt), value);
+ }
+
+ private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte));
+ break;
+ case 1:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16));
+ break;
+ case 2:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32));
+ break;
+ case 3:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64));
+ break;
+ }
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ context.Call(info, address, value);
+ }
+
+ private static void EmitWriteVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ MethodInfo info = null;
+
+ switch (size)
+ {
+ case 0:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte));
+ break;
+ case 1:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16));
+ break;
+ case 2:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32));
+ break;
+ case 3:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64));
+ break;
+ case 4:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128));
+ break;
+ }
+
+ Operand value = default;
+
+ if (size < 4)
+ {
+ switch (size)
+ {
+ case 0:
+ value = context.VectorExtract8(GetVec(rt), elem);
+ break;
+ case 1:
+ value = context.VectorExtract16(GetVec(rt), elem);
+ break;
+ case 2:
+ value = context.VectorExtract(OperandType.I32, GetVec(rt), elem);
+ break;
+ case 3:
+ value = context.VectorExtract(OperandType.I64, GetVec(rt), elem);
+ break;
+ }
+ }
+ else
+ {
+ value = GetVec(rt);
+ }
+
+ context.Call(info, address, value);
+ }
+
+ private static Operand GetInt(ArmEmitterContext context, int rt)
+ {
+ return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt);
+ }
+
+ private static void SetInt(ArmEmitterContext context, int rt, Operand value)
+ {
+ if (context.CurrOp is OpCode32)
+ {
+ SetIntA32(context, rt, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rt, value);
+ }
+ }
+
+ // ARM32 helpers.
+ public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true)
+ {
+ return context.CurrOp switch
+ {
+ IOpCode32MemRsImm op => GetMShiftedByImmediate(context, op, setCarry),
+ IOpCode32MemReg op => GetIntA32(context, op.Rm),
+ IOpCode32Mem op => Const(op.Immediate),
+ OpCode32SimdMemImm op => Const(op.Immediate),
+ _ => throw InvalidOpCodeType(context.CurrOp),
+ };
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32MemRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Immediate;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr:
+ shift = 32;
+ break;
+ case ShiftType.Asr:
+ shift = 32;
+ break;
+ case ShiftType.Ror:
+ shift = 1;
+ break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= false;
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl:
+ m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift);
+ break;
+ case ShiftType.Lsr:
+ m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift);
+ break;
+ case ShiftType.Asr:
+ m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift);
+ break;
+ case ShiftType.Ror:
+ if (op.Immediate != 0)
+ {
+ m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = InstEmitAluHelper.GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMove.cs b/src/ARMeilleure/Instructions/InstEmitMove.cs
new file mode 100644
index 0000000..f23ac33
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMove.cs
@@ -0,0 +1,41 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Movk(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ OperandType type = op.GetOperandType();
+
+ Operand res = GetIntOrZR(context, op.Rd);
+
+ res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit)));
+
+ res = context.BitwiseOr(res, Const(type, op.Immediate));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Movn(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate));
+ }
+
+ public static void Movz(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMul.cs b/src/ARMeilleure/Instructions/InstEmitMul.cs
new file mode 100644
index 0000000..89dc099
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMul.cs
@@ -0,0 +1,101 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics.CodeAnalysis;
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true);
+ public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false);
+
+ private static void EmitMul(ArmEmitterContext context, bool isAdd)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand a = GetIntOrZR(context, op.Ra);
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = isAdd ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd);
+ public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract);
+ public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add);
+ public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract);
+
+ [Flags]
+ [SuppressMessage("Design", "CA1069: Enums values should not be duplicated")]
+ private enum MullFlags
+ {
+ Subtract = 0,
+ Add = 1 << 0,
+ Signed = 1 << 1,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract,
+ }
+
+ private static void EmitMull(ArmEmitterContext context, MullFlags flags)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand GetExtendedRegister32(int index)
+ {
+ Operand value = GetIntOrZR(context, index);
+
+ if ((flags & MullFlags.Signed) != 0)
+ {
+ return context.SignExtend32(value.Type, value);
+ }
+ else
+ {
+ return context.ZeroExtend32(value.Type, value);
+ }
+ }
+
+ Operand a = GetIntOrZR(context, op.Ra);
+
+ Operand n = GetExtendedRegister32(op.Rn);
+ Operand m = GetExtendedRegister32(op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighSI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+
+ public static void Umulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighUI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitMul32.cs b/src/ARMeilleure/Instructions/InstEmitMul32.cs
new file mode 100644
index 0000000..b9966ad
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitMul32.cs
@@ -0,0 +1,378 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ [Flags]
+ private enum MullFlags
+ {
+ Subtract = 1,
+ Add = 1 << 1,
+ Signed = 1 << 2,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract,
+ }
+
+ public static void Mla(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand a = GetIntA32(context, op.Ra);
+
+ Operand res = context.Add(a, context.Multiply(n, m));
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Mls(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+ Operand a = GetIntA32(context, op.Ra);
+
+ Operand res = context.Subtract(a, context.Multiply(n, m));
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Smmla(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.SignedAdd);
+ }
+
+ public static void Smmls(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.SignedSubtract);
+ }
+
+ public static void Smmul(ArmEmitterContext context)
+ {
+ EmitSmmul(context, MullFlags.Signed);
+ }
+
+ private static void EmitSmmul(ArmEmitterContext context, MullFlags flags)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ if (flags.HasFlag(MullFlags.Add) && op.Ra != 0xf)
+ {
+ res = context.Add(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res);
+ }
+ else if (flags.HasFlag(MullFlags.Subtract))
+ {
+ res = context.Subtract(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res);
+ }
+
+ if (op.R)
+ {
+ res = context.Add(res, Const(0x80000000L));
+ }
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightSI(res, Const(32)));
+
+ EmitGenericAluStoreA32(context, op.Rd, false, hi);
+ }
+
+ public static void Smla__(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+ Operand a = GetIntA32(context, op.Ra);
+
+ if (op.NHigh)
+ {
+ n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I64, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.SignExtend32(OperandType.I64, a);
+ res = context.Add(res, toAdd);
+ Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res));
+ res = context.ConvertI64ToI32(res);
+
+ UpdateQFlag(context, q);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smlal(ArmEmitterContext context)
+ {
+ EmitMlal(context, true);
+ }
+
+ public static void Smlal__(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.NHigh)
+ {
+ n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16)));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I64, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32));
+ toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)));
+ res = context.Add(res, toAdd);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.RdHi, false, hi);
+ EmitGenericAluStoreA32(context, op.RdLo, false, lo);
+ }
+
+ public static void Smlaw_(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+ Operand a = GetIntA32(context, op.Ra);
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m);
+
+ Operand toAdd = context.ShiftLeft(context.SignExtend32(OperandType.I64, a), Const(16));
+ res = context.Add(res, toAdd);
+ res = context.ShiftRightSI(res, Const(16));
+ Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res));
+ res = context.ConvertI64ToI32(res);
+
+ UpdateQFlag(context, q);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smul__(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.NHigh)
+ {
+ n = context.ShiftRightSI(n, Const(16));
+ }
+ else
+ {
+ n = context.SignExtend16(OperandType.I32, n);
+ }
+
+ if (op.MHigh)
+ {
+ m = context.ShiftRightSI(m, Const(16));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I32, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Smull(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ public static void Smulw_(ArmEmitterContext context)
+ {
+ IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (op.MHigh)
+ {
+ m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16)));
+ }
+ else
+ {
+ m = context.SignExtend16(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m);
+
+ res = context.ShiftRightUI(res, Const(16));
+ res = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.Rd, false, res);
+ }
+
+ public static void Umaal(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+ Operand dHi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi));
+ Operand dLo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo));
+
+ Operand res = context.Multiply(n, m);
+ res = context.Add(res, dHi);
+ res = context.Add(res, dLo);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ EmitGenericAluStoreA32(context, op.RdHi, false, hi);
+ EmitGenericAluStoreA32(context, op.RdLo, false, lo);
+ }
+
+ public static void Umlal(ArmEmitterContext context)
+ {
+ EmitMlal(context, false);
+ }
+
+ public static void Umull(ArmEmitterContext context)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn));
+ Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm));
+
+ Operand res = context.Multiply(n, m);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ private static void EmitMlal(ArmEmitterContext context, bool signed)
+ {
+ IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+ Operand m = GetIntA32(context, op.Rm);
+
+ if (signed)
+ {
+ n = context.SignExtend32(OperandType.I64, n);
+ m = context.SignExtend32(OperandType.I64, m);
+ }
+ else
+ {
+ n = context.ZeroExtend32(OperandType.I64, n);
+ m = context.ZeroExtend32(OperandType.I64, m);
+ }
+
+ Operand res = context.Multiply(n, m);
+
+ Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32));
+ toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)));
+ res = context.Add(res, toAdd);
+
+ Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32)));
+ Operand lo = context.ConvertI64ToI32(res);
+
+ if (ShouldSetFlags(context))
+ {
+ EmitNZFlagsCheck(context, res);
+ }
+
+ EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi);
+ EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo);
+ }
+
+ private static void UpdateQFlag(ArmEmitterContext context, Operand q)
+ {
+ Operand lblSkipSetQ = Label();
+
+ context.BranchIfFalse(lblSkipSetQ, q);
+
+ SetFlag(context, PState.QFlag, Const(1));
+
+ context.MarkLabel(lblSkipSetQ);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
new file mode 100644
index 0000000..13d9fac
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -0,0 +1,5284 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func;
+
+ static partial class InstEmit
+ {
+ public static void Abs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS);
+ }
+ else
+ {
+ EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Abs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Add_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS);
+ }
+ else
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Add_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(addInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+ }
+ }
+
+ public static void Addp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+
+ Operand res = context.Add(ne0, ne1);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+ }
+ }
+
+ public static void Addp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PaddInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Cls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int eSize = 8 << op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Clz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = eSize switch
+ {
+ 8 => Clz_V_I8(context, GetVec(op.Rn)),
+ 16 => Clz_V_I16(context, GetVec(op.Rn)),
+ 32 => Clz_V_I32(context, GetVec(op.Rn)),
+ _ => default,
+ };
+
+ if (res != default)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+
+ res = context.VectorZero();
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg)
+ {
+ if (!Optimizations.UseSsse3)
+ {
+ return default;
+ }
+
+ // CLZ nibble table.
+ Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04);
+
+ Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f);
+ Operand c04 = X86GetAllElements(context, 0x04_04_04_04);
+
+ // CLZ of low 4 bits of elements in arg.
+ Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg);
+
+ // Get the high 4 bits of elements in arg.
+ Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4));
+ hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow);
+
+ // CLZ of high 4 bits of elements in arg.
+ Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg);
+
+ // If high 4 bits are not all zero, we discard the CLZ of the low 4 bits.
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqb, hiClz, c04);
+ loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask);
+
+ return context.AddIntrinsic(Intrinsic.X86Paddb, loClz, hiClz);
+ }
+
+ private static Operand Clz_V_I16(ArmEmitterContext context, Operand arg)
+ {
+ if (!Optimizations.UseSsse3)
+ {
+ return default;
+ }
+
+ Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01);
+ Operand maskLow = X86GetAllElements(context, 0x00ff_00ff);
+ Operand c0008 = X86GetAllElements(context, 0x0008_0008);
+
+ // CLZ pair of high 8 and low 8 bits of elements in arg.
+ Operand hiloClz = Clz_V_I8(context, arg);
+ // Get CLZ of low 8 bits in each pair.
+ Operand loClz = context.AddIntrinsic(Intrinsic.X86Pand, hiloClz, maskLow);
+ // Get CLZ of high 8 bits in each pair.
+ Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, hiloClz, maskSwap);
+
+ // If high 8 bits are not all zero, we discard the CLZ of the low 8 bits.
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, hiClz, c0008);
+ loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask);
+
+ return context.AddIntrinsic(Intrinsic.X86Paddw, loClz, hiClz);
+ }
+
+ private static Operand Clz_V_I32(ArmEmitterContext context, Operand arg)
+ {
+ // TODO: Use vplzcntd when AVX-512 is supported.
+ if (!Optimizations.UseSse2)
+ {
+ return default;
+ }
+
+#pragma warning disable IDE0055 // Disable formatting
+ Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1);
+ Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1);
+ Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8));
+ Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1);
+ Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1);
+ Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne());
+#pragma warning restore IDE0055
+
+ Operand c55555555 = X86GetAllElements(context, 0x55555555);
+ Operand c33333333 = X86GetAllElements(context, 0x33333333);
+ Operand c0f0f0f0f = X86GetAllElements(context, 0x0f0f0f0f);
+ Operand c0000003f = X86GetAllElements(context, 0x0000003f);
+
+ Operand tmp0;
+ Operand tmp1;
+ Operand res;
+
+ // Set all bits after highest set bit to 1.
+ res = OrVector(ShiftRightVectorUI32(arg, 1), arg);
+ res = OrVector(ShiftRightVectorUI32(res, 2), res);
+ res = OrVector(ShiftRightVectorUI32(res, 4), res);
+ res = OrVector(ShiftRightVectorUI32(res, 8), res);
+ res = OrVector(ShiftRightVectorUI32(res, 16), res);
+
+ // Make leading 0s into leading 1s.
+ res = NotVector(res);
+
+ // Count leading 1s, which is the population count.
+ tmp0 = ShiftRightVectorUI32(res, 1);
+ tmp0 = AndVector(tmp0, c55555555);
+ res = SubVectorI32(res, tmp0);
+
+ tmp0 = ShiftRightVectorUI32(res, 2);
+ tmp0 = AndVector(tmp0, c33333333);
+ tmp1 = AndVector(res, c33333333);
+ res = AddVectorI32(tmp0, tmp1);
+
+ tmp0 = ShiftRightVectorUI32(res, 4);
+ tmp0 = AddVectorI32(tmp0, res);
+ res = AndVector(tmp0, c0f0f0f0f);
+
+ tmp0 = ShiftRightVectorUI32(res, 8);
+ res = AddVectorI32(tmp0, res);
+
+ tmp0 = ShiftRightVectorUI32(res, 16);
+ res = AddVectorI32(tmp0, res);
+
+ res = AndVector(res, c0000003f);
+
+ return res;
+ }
+
+ public static void Cnt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV);
+ }
+ else
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de;
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+ }
+ else
+ {
+ de = EmitCountSetBits8(context, ne);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fabd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, true, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, false, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+
+ return EmitUnaryMathCall(context, nameof(Math.Abs), res);
+ });
+ }
+ }
+
+ public static void Fabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, true, true);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = EmitFloatAbs(context, res, false, true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+
+ return EmitUnaryMathCall(context, nameof(Math.Abs), res);
+ });
+ }
+ }
+
+ public static void Fabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Abs), op1);
+ });
+ }
+ }
+
+ public static void Fabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Abs), op1);
+ });
+ }
+ }
+
+ public static void Fadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Fadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
+
+ return context.AddIntrinsic(addInst, op1, op2);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmax_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnmv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnmv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminpS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: true, op1, op2);
+ });
+ }
+ else
+ {
+ EmitScalarPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmla_Se(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe);
+ }
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Fmla_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmla_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Se(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe);
+ }
+ else if (Optimizations.UseFma)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, d, n, res);
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Fmls_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, a, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Se(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe);
+ }
+ else
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Fmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_Se(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe);
+ }
+ else
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe);
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2);
+ });
+ }
+ }
+
+ public static void Fneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand mask = X86GetScalar(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fnmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fnmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231ss, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfmsub231sd, a, n, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
+ }
+
+ context.Copy(d, context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fnmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+ }
+
+ public static void Frecpe_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frecpe_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frecps_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetScalar(context, 2f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetScalar(context, 2d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frecps_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, 2f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, 2d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, mask, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+ }
+
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frecpx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1);
+ });
+ }
+ }
+
+ public static void Frinta_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+ }
+
+ public static void Frinta_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+ }
+
+ public static void Frinti_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frinti_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Floor), op1);
+ });
+ }
+ }
+
+ public static void Frintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Floor), op1);
+ });
+ }
+ }
+
+ public static void Frintn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1);
+ });
+ }
+ }
+
+ public static void Frintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1);
+ });
+ }
+ }
+
+ public static void Frintx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintx_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintxV);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ public static void Frintz_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Truncate), op1);
+ });
+ }
+ }
+
+ public static void Frintz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, nameof(Math.Truncate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ // RSQRTSS handles subnormals as zero, which differs from Arm, so we can't use it here.
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtss, GetVec(op.Rn));
+ res = context.AddIntrinsic(Intrinsic.X86Rcpss, res);
+ res = EmitSse41Round32Exp8OpF(context, res, scalar: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
+ {
+ // RSQRTPS handles subnormals as zero, which differs from Arm, so we can't use it here.
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtps, GetVec(op.Rn));
+ res = context.AddIntrinsic(Intrinsic.X86Rcpps, res);
+ res = EmitSse41Round32Exp8OpF(context, res, scalar: false);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1);
+ });
+ }
+ }
+
+ public static void Frsqrts_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5f);
+ Operand maskThree = X86GetScalar(context, 3f);
+ Operand maskOneHalf = X86GetScalar(context, 1.5f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5d);
+ Operand maskThree = X86GetScalar(context, 3d);
+ Operand maskOneHalf = X86GetScalar(context, 1.5d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Frsqrts_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ Operand res;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+ Operand maskOneHalf = X86GetAllElements(context, 1.5f);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+ Operand maskOneHalf = X86GetAllElements(context, 1.5d);
+
+ if (Optimizations.UseFma)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, maskThree, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+ res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2);
+ });
+ }
+ }
+
+ public static void Fsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Fsqrt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Fsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+ });
+ }
+ }
+
+ public static void Fsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2);
+ });
+ }
+ }
+
+ public static void Mla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.Add);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mla_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe);
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.Subtract);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe);
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41VectorMul_AddSub(context, AddSub.None);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Mul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe);
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Neg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS);
+ }
+ else
+ {
+ EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Neg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Pmull_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseArm64Pmull)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
+ }
+ else if (Optimizations.UsePclmulqdq && op.Size == 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int imm8 = op.RegisterSize == RegisterSize.Simd64 ? 0b0000_0000 : 0b0001_0001;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, n, m, Const(imm8));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ n = context.VectorZeroUpper64(n);
+ m = context.VectorZeroUpper64(m);
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Operand res = context.VectorZero();
+
+ if (op.Size == 0)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, n);
+ m = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, m);
+
+ for (int i = 0; i < 8; i++)
+ {
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Psllw, n, Const(15 - i));
+ mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15));
+
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Psllw, m, Const(i));
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
+ }
+ }
+ else /* if (op.Size == 3) */
+ {
+ Operand zero = context.VectorZero();
+
+ for (int i = 0; i < 64; i++)
+ {
+ Operand mask = context.AddIntrinsic(Intrinsic.X86Movlhps, n, n);
+ mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i));
+ mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63));
+ mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask);
+
+ Operand tmp = EmitSse2Sll_128(context, m, i);
+ tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ res = context.VectorZero();
+
+ int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 8;
+
+ for (int index = 0; index < 8; index++)
+ {
+ Operand ne = context.VectorExtract8(n, part + index);
+ Operand me = context.VectorExtract8(m, part + index);
+
+ Operand de = EmitPolynomialMultiply(context, ne, me, 8);
+
+ res = EmitVectorInsert(context, res, de, index, 1);
+ }
+ }
+ else /* if (op.Size == 3) */
+ {
+ int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 1;
+
+ Operand ne = context.VectorExtract(OperandType.I64, n, part);
+ Operand me = context.VectorExtract(OperandType.I64, m, part);
+
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Raddhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+ }
+ }
+
+ public static void Rsubhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+ }
+ }
+
+ public static void Saba_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV);
+ }
+ else
+ {
+ EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Sabal_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Sabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41VectorSabdOp(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovsxbw
+ : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41VectorSabdOp(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sadalp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: true);
+ }
+ }
+
+ public static void Saddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddlp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: false);
+ }
+ }
+
+ public static void Saddlv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV);
+ }
+ else
+ {
+ EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Shadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Shsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand nPlusMask = context.AddIntrinsic(addInst, n, mask);
+ Operand mPlusMask = context.AddIntrinsic(addInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, nPlusMask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Smax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Sminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PminsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Sminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true));
+ }
+ }
+
+ public static void Smlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlal_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovsxbw : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smull_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Smull_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe);
+ }
+ else
+ {
+ EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Sqabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS);
+ }
+ else
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Sqabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV);
+ }
+ else
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ public static void Sqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+ }
+ }
+
+ public static void Sqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add);
+ }
+ }
+
+ public static void Sqdmulh_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqdmulh_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqdmulh_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false));
+ }
+ }
+
+ public static void Sqneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS);
+ }
+ else
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Sqneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV);
+ }
+ else
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Sqrdmulh_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqrdmulh_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqrdmulh_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true));
+ }
+ }
+
+ public static void Sqsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Sqsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Sqxtn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqxtn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqxtun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqxtun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Srhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand nMinusMask = context.AddIntrinsic(subInst, n, mask);
+ Operand mMinusMask = context.AddIntrinsic(subInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightSI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Ssubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Ssubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS);
+ }
+ else
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Subhn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV);
+ }
+ else
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+ }
+ }
+
+ public static void Suqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Suqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Uaba_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Uabal_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+ }
+
+ public static void Uabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41VectorUabdOp(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovzxbw
+ : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41VectorUabdOp(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uadalp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: true);
+ }
+ }
+
+ public static void Uaddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddlp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV);
+ }
+ else
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: false);
+ }
+ }
+
+ public static void Uaddlv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV);
+ }
+ else
+ {
+ EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Uhsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Umax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Uminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp(context, X86PminuInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Uminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV);
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false));
+ }
+ }
+
+ public static void Umlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlal_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV);
+ }
+ else if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovzxbw : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe);
+ }
+ else
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umull_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Umull_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe);
+ }
+ else
+ {
+ EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Uqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+ }
+
+ public static void Uqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+ }
+
+ public static void Uqsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Uqsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+ }
+
+ public static void Uqxtn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqxtn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV);
+ }
+ else
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Urhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV);
+ }
+ else if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightUI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Usqadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS);
+ }
+ else
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Usqadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV);
+ }
+ else
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+ }
+
+ public static void Usubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Usubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand e = context.Add(ne0, ne1);
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitDoublingMultiplyHighHalf(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = context.Multiply(n, m);
+
+ if (!round)
+ {
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ }
+ else
+ {
+ long roundConst = 1L << (eSize - 1);
+
+ res = context.ShiftLeft(res, Const(1));
+
+ res = context.Add(res, Const(roundConst));
+
+ res = context.ShiftRightSI(res, Const(eSize));
+
+ Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue));
+
+ res = context.ConditionalSelect(isIntMin, context.Negate(res), res);
+ }
+
+ return res;
+ }
+
+ private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int elems = 8 >> op.Size;
+ int eSize = 8 << op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ long roundConst = 1L << (eSize - 1);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1);
+
+ Operand de = emit(ne, me);
+
+ if (round)
+ {
+ de = context.Add(de, Const(roundConst));
+ }
+
+ de = context.ShiftRightUI(de, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static Operand EmitMax64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand cmp = signed
+ ? context.ICompareGreaterOrEqual(op1, op2)
+ : context.ICompareGreaterOrEqualUI(op1, op2);
+
+ return context.ConditionalSelect(cmp, op1, op2);
+ }
+
+ private static Operand EmitMin64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand cmp = signed
+ ? context.ICompareLessOrEqual(op1, op2)
+ : context.ICompareLessOrEqualUI(op1, op2);
+
+ return context.ConditionalSelect(cmp, op1, op2);
+ }
+
+ private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: true);
+ }
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
+
+ res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: false);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitSse41Round32Exp8OpF(ArmEmitterContext context, Operand value, bool scalar)
+ {
+ Operand roundMask;
+ Operand truncMask;
+ Operand expMask;
+
+ if (scalar)
+ {
+ roundMask = X86GetScalar(context, 0x4000);
+ truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetScalar(context, 0x7F800000);
+ }
+ else
+ {
+ roundMask = X86GetAllElements(context, 0x4000);
+ truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000));
+ expMask = X86GetAllElements(context, 0x7F800000);
+ }
+
+ Operand oValue = value;
+ Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask);
+ Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqd, masked, expMask);
+
+ value = context.AddIntrinsic(Intrinsic.X86Paddd, value, roundMask);
+ value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask);
+
+ return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf);
+ }
+
+ private static Operand EmitSse41RecipStepSelectOpF(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ Operand res,
+ Operand mask,
+ bool scalar,
+ int sizeF)
+ {
+ Intrinsic cmpOp;
+ Intrinsic shlOp;
+ Intrinsic blendOp;
+ Operand zero = context.VectorZero();
+ Operand expMask;
+
+ if (sizeF == 0)
+ {
+ cmpOp = Intrinsic.X86Pcmpeqd;
+ shlOp = Intrinsic.X86Pslld;
+ blendOp = Intrinsic.X86Blendvps;
+ expMask = scalar ? X86GetScalar(context, 0x7F800000 << 1) : X86GetAllElements(context, 0x7F800000 << 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ cmpOp = Intrinsic.X86Pcmpeqq;
+ shlOp = Intrinsic.X86Psllq;
+ blendOp = Intrinsic.X86Blendvpd;
+ expMask = scalar ? X86GetScalar(context, 0x7FF0000000000000L << 1) : X86GetAllElements(context, 0x7FF0000000000000L << 1);
+ }
+
+ n = context.AddIntrinsic(shlOp, n, Const(1));
+ m = context.AddIntrinsic(shlOp, m, Const(1));
+
+ Operand nZero = context.AddIntrinsic(cmpOp, n, zero);
+ Operand mZero = context.AddIntrinsic(cmpOp, m, zero);
+ Operand nInf = context.AddIntrinsic(cmpOp, n, expMask);
+ Operand mInf = context.AddIntrinsic(cmpOp, m, expMask);
+
+ Operand nmZero = context.AddIntrinsic(Intrinsic.X86Por, nZero, mZero);
+ Operand nmInf = context.AddIntrinsic(Intrinsic.X86Por, nInf, mInf);
+ Operand nmZeroInf = context.AddIntrinsic(Intrinsic.X86Pand, nmZero, nmInf);
+
+ return context.AddIntrinsic(blendOp, res, mask, nmZeroInf);
+ }
+
+ public static void EmitSse2VectorIsNaNOpF(
+ ArmEmitterContext context,
+ Operand opF,
+ out Operand qNaNMask,
+ out Operand sNaNMask,
+ bool? isQNaN = null)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ const int QBit = 22;
+
+ Operand qMask = X86GetAllElements(context, 1 << QBit);
+
+ Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ));
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
+
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : default;
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ const int QBit = 51;
+
+ Operand qMask = X86GetAllElements(context, 1L << QBit);
+
+ Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ));
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
+ mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
+
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : default;
+ }
+ }
+
+ public static Operand EmitSse41ProcessNaNsOpF(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool scalar,
+ Operand n = default,
+ Operand m = default)
+ {
+ Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
+ Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
+
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false);
+
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
+
+ if (sizeF == 0)
+ {
+ const int QBit = 22;
+
+ Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ else /* if (sizeF == 1) */
+ {
+ const int QBit = 51;
+
+ Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ }
+
+ private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ }
+
+ private static Operand EmitSse41MaxMinNumOpF(
+ ArmEmitterContext context,
+ bool isMaxNum,
+ bool scalar,
+ Operand n = default,
+ Operand m = default)
+ {
+ Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
+ Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
+
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true);
+
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand negInfMask = scalar
+ ? X86GetScalar(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
+
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask);
+
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand negInfMask = scalar
+ ? X86GetScalar(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
+
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask);
+
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
+
+ if (n != default || m != default)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return default;
+ }
+ }
+
+ private enum AddSub
+ {
+ None,
+ Add,
+ Subtract,
+ }
+
+ private static void EmitSse41VectorMul_AddSub(ArmEmitterContext context, AddSub addSub)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ if (op.Size == 0)
+ {
+ Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8));
+ Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8);
+
+ res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8));
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+
+ Operand mask = X86GetAllElements(context, 0x00FF00FF);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask);
+ }
+ else if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m);
+ }
+
+ Operand d = GetVec(op.Rd);
+
+ if (addSub == AddSub.Add)
+ {
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, d, res);
+ }
+ else if (addSub == AddSub.Subtract)
+ {
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, d, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static void EmitSse41VectorSabdOp(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic cmpgtInst = X86PcmpgtInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41VectorUabdOp(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic maxInst = X86PmaxuInstruction[size];
+
+ Operand max = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpeqInst = X86PcmpeqInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m);
+
+ Operand onesMask = X86GetAllElements(context, -1L);
+
+ cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitSse2Sll_128(ArmEmitterContext context, Operand op, int shift)
+ {
+ // The upper part of op is assumed to be zero.
+ Debug.Assert(shift >= 0 && shift < 64);
+
+ if (shift == 0)
+ {
+ return op;
+ }
+
+ Operand high = context.AddIntrinsic(Intrinsic.X86Pslldq, op, Const(8));
+ high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift));
+
+ Operand low = context.AddIntrinsic(Intrinsic.X86Psllq, op, Const(shift));
+
+ return context.AddIntrinsic(Intrinsic.X86Por, high, low);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
new file mode 100644
index 0000000..c807fc8
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -0,0 +1,1736 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vabd_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
+ }
+
+ public static void Vabdl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
+ }
+
+ public static void Vabs_S(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ return EmitFloatAbs(context, m, (op.Size & 1) == 0, false);
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
+ }
+ }
+
+ public static void Vabs_V(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return EmitFloatAbs(context, m, (op.Size & 1) == 0, true);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ public static void Vadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2));
+ }
+ }
+
+ public static void Vadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
+ }
+ }
+
+ public static void Vadd_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PaddInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Vaddl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+
+ public static void Vaddw_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
+
+ EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+
+ public static void Vcnt(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount();
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand me = EmitVectorExtractZx32(context, op.Qm, op.Im + index, op.Size);
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicInt(Intrinsic.X86Popcnt, me);
+ }
+ else
+ {
+ de = EmitCountSetBits8(context, me);
+ }
+
+ res = EmitVectorInsert(context, res, de, op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vdup(ArmEmitterContext context)
+ {
+ OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp;
+
+ Operand insert = GetIntA32(context, op.Rt);
+
+ // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
+ insert = op.Size switch
+ {
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\"."),
+ };
+
+ InsertScalar(context, op.Vd, insert);
+ if (op.Q)
+ {
+ InsertScalar(context, op.Vd + 1, insert);
+ }
+ }
+
+ public static void Vdup_1(ArmEmitterContext context)
+ {
+ OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp;
+
+ Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size);
+
+ // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
+ insert = op.Size switch
+ {
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\"."),
+ };
+
+ InsertScalar(context, op.Vd, insert);
+ if (op.Q)
+ {
+ InsertScalar(context, op.Vd | 1, insert);
+ }
+ }
+
+ private static (long, long) MaskHelperByteSequence(int start, int length, int startByte)
+ {
+ int end = start + length;
+ int b = startByte;
+ long result = 0;
+ long result2 = 0;
+ for (int i = 0; i < 8; i++)
+ {
+ result |= (long)((i >= end || i < start) ? 0x80 : b++) << (i * 8);
+ }
+ for (int i = 8; i < 16; i++)
+ {
+ result2 |= (long)((i >= end || i < start) ? 0x80 : b++) << ((i - 8) * 8);
+ }
+ return (result2, result);
+ }
+
+ public static void Vext(ArmEmitterContext context)
+ {
+ OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp;
+ int elems = op.GetBytesCount();
+ int byteOff = op.Immediate;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ // Writing low to high of d: start into n, overlap into m.
+ // Then rotate n down by , m up by (elems)-imm.
+ // Then OR them together for the result.
+
+ (long nMaskHigh, long nMaskLow) = MaskHelperByteSequence(0, elems - byteOff, byteOff);
+ (long mMaskHigh, long mMaskLow) = MaskHelperByteSequence(elems - byteOff, byteOff, 0);
+ Operand nMask, mMask;
+ if (!op.Q)
+ {
+ // Do the same operation to the bytes in the top doubleword too, as our target could be in either.
+ nMaskHigh = nMaskLow + 0x0808080808080808L;
+ mMaskHigh = mMaskLow + 0x0808080808080808L;
+ }
+ nMask = X86GetElements(context, nMaskHigh, nMaskLow);
+ mMask = X86GetElements(context, mMaskHigh, mMaskLow);
+ Operand nPart = context.AddIntrinsic(Intrinsic.X86Pshufb, n, nMask);
+ Operand mPart = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mMask);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, nPart, mPart);
+ });
+ }
+ else
+ {
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand extract;
+
+ if (byteOff >= elems)
+ {
+ extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size);
+ }
+ else
+ {
+ extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size);
+ }
+ byteOff++;
+
+ res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+
+ public static void Vfma_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfma_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfms_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfms_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfnma_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfnms_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vhadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftRightUI(context.Add(op1, op2), Const(1)));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftRightSI(context.Add(op1, op2), Const(1)));
+ }
+ }
+
+ public static void Vmov_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF32(context, 0, 0);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => op1);
+ }
+ }
+
+ public static void Vmovn(ArmEmitterContext context)
+ {
+ EmitVectorUnaryNarrowOp32(context, (op1) => op1);
+ }
+
+ public static void Vneg_S(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
+ }
+ else
+ {
+ Operand mask = X86GetScalar(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
+ }
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Vnmul_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpSimd32(context, (n, m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+ Operand mask = X86GetScalar(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+ Operand mask = X86GetScalar(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
+ }
+ });
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+ }
+
+ public static void Vnmla_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(context.Negate(op1), context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), context.Negate(op1), res);
+ });
+ }
+ }
+
+ public static void Vnmls_S(ArmEmitterContext context)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Add(context.Negate(op1), context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), context.Negate(op1), res);
+ });
+ }
+ }
+
+ public static void Vneg_V(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+ return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
+ }
+ else
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+ return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Vdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmaxnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, true, true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2));
+ }
+ }
+
+ public static void Vmaxnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, true, false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxNumFpscr), op1, op2));
+ }
+ }
+
+ public static void Vminnm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, false, true);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2));
+ }
+ }
+
+ public static void Vminnm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse41MaxMinNumOpF32(context, false, false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinNumFpscr), op1, op2));
+ }
+ }
+
+ public static void Vmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmax_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxuInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2));
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxsInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2));
+ }
+ }
+ }
+
+ public static void Vmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmin_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminuInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminsInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2));
+ }
+ }
+ }
+
+ public static void Vmla_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res);
+ });
+ }
+ }
+
+ public static void Vmla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vmla_I(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+
+ public static void Vmla_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3));
+ }
+ }
+ else
+ {
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false);
+ }
+ }
+
+ public static void Vmlal_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorTernaryLongOpI32(context, (d, n, m) => context.Add(d, context.Multiply(n, m)), !op.U);
+ }
+
+ public static void Vmls_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res);
+ });
+ }
+ }
+
+ public static void Vmls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vmls_I(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+
+ public static void Vmls_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ }
+ else
+ {
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3));
+ }
+ }
+ else
+ {
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false);
+ }
+ }
+
+ public static void Vmlsl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U);
+ }
+
+ public static void Vmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2);
+ });
+ }
+ }
+
+ public static void Vmul_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U) // This instruction is always signed, U indicates polynomial mode.
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Vmul_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2));
+ }
+ }
+ else
+ {
+ EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
+ }
+ }
+
+ public static void Vmull_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
+ }
+
+ public static void Vmull_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ if (op.Polynomial)
+ {
+ if (op.Size == 0) // P8
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
+ }
+ else /* if (op.Size == 2) // P64 */
+ {
+ Operand ne = context.VectorExtract(OperandType.I64, GetVec(op.Qn), op.Vn & 1);
+ Operand me = context.VectorExtract(OperandType.I64, GetVec(op.Qm), op.Vm & 1);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
+ }
+ }
+
+ public static void Vpadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpadd_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, X86PaddInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
+ }
+ }
+
+ public static void Vpadal(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorPairwiseTernaryLongOpI32(context, (op1, op2, op3) => context.Add(context.Add(op1, op2), op3), op.Opc != 1);
+ }
+
+ public static void Vpaddl(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorPairwiseLongOpI32(context, (op1, op2) => context.Add(op1, op2), (op.Opc & 1) == 0);
+ }
+
+ public static void Vpmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat64.FPMaxFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpmax_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, op.U ? X86PmaxuInstruction : X86PmaxsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) =>
+ {
+ Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2);
+ return context.ConditionalSelect(greater, op1, op2);
+ }, !op.U);
+ }
+ }
+
+ public static void Vpmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2));
+ }
+ }
+
+ public static void Vpmin_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction);
+ }
+ else
+ {
+ EmitVectorPairwiseOpI32(context, (op1, op2) =>
+ {
+ Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2);
+ return context.ConditionalSelect(greater, op1, op2);
+ }, !op.U);
+ }
+ }
+
+ public static void Vqadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitSaturatingAddSubBinaryOp(context, add: true, !op.U);
+ }
+
+ public static void Vqdmulh(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ int eSize = 8 << op.Size;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) =>
+ {
+ if (op.Size == 2)
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op2 = context.SignExtend32(OperandType.I64, op2);
+ }
+
+ Operand res = context.Multiply(op1, op2);
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }, signed: true);
+ }
+
+ public static void Vqmovn(ArmEmitterContext context)
+ {
+ OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+ bool signed = !op.Q;
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signed, signed), signed);
+ }
+
+ public static void Vqmovun(ArmEmitterContext context)
+ {
+ OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true);
+ }
+
+ public static void Vqrdmulh(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ int eSize = 8 << op.Size;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) =>
+ {
+ if (op.Size == 2)
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op2 = context.SignExtend32(OperandType.I64, op2);
+ }
+
+ Operand res = context.Multiply(op1, op2);
+ res = context.Add(res, Const(res.Type, 1L << (eSize - 2)));
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }, signed: true);
+ }
+
+ public static void Vqsub(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitSaturatingAddSubBinaryOp(context, add: false, !op.U);
+ }
+
+ public static void Vrev(ArmEmitterContext context)
+ {
+ OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ Operand mask;
+ switch (op.Size)
+ {
+ case 3:
+ // Rev64
+ switch (op.Opc)
+ {
+ case 0:
+ mask = X86GetElements(context, 0x08090a0b0c0d0e0fL, 0x0001020304050607L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 1:
+ mask = X86GetElements(context, 0x09080b0a0d0c0f0eL, 0x0100030205040706L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 2:
+ return context.AddIntrinsic(Intrinsic.X86Shufps, op1, op1, Const(1 | (0 << 2) | (3 << 4) | (2 << 6)));
+ }
+ break;
+ case 2:
+ // Rev32
+ switch (op.Opc)
+ {
+ case 0:
+ mask = X86GetElements(context, 0x0c0d0e0f_08090a0bL, 0x04050607_00010203L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ case 1:
+ mask = X86GetElements(context, 0x0d0c0f0e_09080b0aL, 0x05040706_01000302L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ }
+ break;
+ case 1:
+ // Rev16
+ mask = X86GetElements(context, 0x0e0f_0c0d_0a0b_0809L, 0x_0607_0405_0203_0001L);
+ return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
+ }
+
+ throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ switch (op.Opc)
+ {
+ case 0:
+ switch (op.Size) // Swap bytes.
+ {
+ case 1:
+ return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1);
+ case 2:
+ case 3:
+ return context.ByteSwap(op1);
+ }
+ break;
+ case 1:
+ switch (op.Size)
+ {
+ case 2:
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16)));
+ case 3:
+ return context.BitwiseOr(
+ context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))),
+ context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16))));
+ }
+ break;
+ case 2:
+ // Swap upper and lower halves.
+ return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)),
+ context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32)));
+ }
+
+ throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
+ });
+ }
+ }
+
+ public static void Vrecpe(ArmEmitterContext context)
+ {
+ OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
+
+ if (op.F)
+ {
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRecipEstimateFpscr), op1);
+ });
+ }
+ }
+ else
+ {
+ throw new NotImplementedException("Integer Vrecpe not currently implemented.");
+ }
+ }
+
+ public static void Vrecps(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ bool single = (op.Size & 1) == 0;
+
+ // (2 - (n*m))
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (single)
+ {
+ Operand maskTwo = X86GetAllElements(context, 2f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ return context.AddIntrinsic(Intrinsic.X86Subps, maskTwo, res);
+ }
+ else
+ {
+ Operand maskTwo = X86GetAllElements(context, 2d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ return context.AddIntrinsic(Intrinsic.X86Subpd, maskTwo, res);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStep), op1, op2);
+ });
+ }
+ }
+
+ public static void Vrhadd(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (op1, op2) =>
+ {
+ if (op.Size == 2)
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op2 = context.ZeroExtend32(OperandType.I64, op2);
+ }
+
+ Operand res = context.Add(context.Add(op1, op2), Const(op1.Type, 1L));
+ res = context.ShiftRightUI(res, Const(1));
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }, !op.U);
+ }
+
+ public static void Vrsqrte(ArmEmitterContext context)
+ {
+ OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
+
+ if (op.F)
+ {
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRSqrtEstimateFpscr), op1);
+ });
+ }
+ }
+ else
+ {
+ throw new NotImplementedException("Integer Vrsqrte not currently implemented.");
+ }
+ }
+
+ public static void Vrsqrts(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ bool single = (op.Size & 1) == 0;
+
+ // (3 - (n*m)) / 2
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (single)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ return context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+ }
+ else
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ return context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+ }
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStep), op1, op2);
+ });
+ }
+ }
+
+ public static void Vsel(ArmEmitterContext context)
+ {
+ OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp;
+
+ Operand condition = default;
+
+ switch (op.Cc)
+ {
+ case OpCode32SimdSelMode.Eq:
+ condition = GetCondTrue(context, Condition.Eq);
+ break;
+ case OpCode32SimdSelMode.Ge:
+ condition = GetCondTrue(context, Condition.Ge);
+ break;
+ case OpCode32SimdSelMode.Gt:
+ condition = GetCondTrue(context, Condition.Gt);
+ break;
+ case OpCode32SimdSelMode.Vs:
+ condition = GetCondTrue(context, Condition.Vs);
+ break;
+ }
+
+ EmitScalarBinaryOpI32(context, (op1, op2) =>
+ {
+ return context.ConditionalSelect(condition, op1, op2);
+ });
+ }
+
+ public static void Vsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
+ });
+ }
+ }
+
+ public static void Vsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else
+ {
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsub_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+ EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PsubInstruction[op.Size], op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Vsubl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
+ }
+
+ public static void Vsubw_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
+
+ EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
+ }
+
+ private static void EmitSaturatingAddSubBinaryOp(ArmEmitterContext context, bool add, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorBinaryOpI32(context, (ne, me) =>
+ {
+ if (op.Size <= 2)
+ {
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ Operand res = add ? context.Add(ne, me) : context.Subtract(ne, me);
+
+ res = EmitSatQ(context, res, 8 << op.Size, signedSrc: true, signed);
+
+ if (op.Size == 2)
+ {
+ res = context.ConvertI64ToI32(res);
+ }
+
+ return res;
+ }
+ else if (add) /* if (op.Size == 3) */
+ {
+ return signed
+ ? EmitBinarySignedSatQAdd(context, ne, me)
+ : EmitBinaryUnsignedSatQAdd(context, ne, me);
+ }
+ else /* if (sub) */
+ {
+ return signed
+ ? EmitBinarySignedSatQSub(context, ne, me)
+ : EmitBinaryUnsignedSatQSub(context, ne, me);
+ }
+ }, signed);
+ }
+
+ private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
+ {
+ IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
+
+ Operand genericEmit(Operand n, Operand m)
+ {
+ Operand nNum = context.Copy(n);
+ Operand mNum = context.Copy(m);
+
+ InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true);
+ InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
+
+ nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
+ mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
+
+ return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
+
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
+
+ nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
+ mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
+
+ return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
+ }
+ }
+
+ if (scalar)
+ {
+ EmitScalarBinaryOpSimd32(context, genericEmit);
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, genericEmit);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs
new file mode 100644
index 0000000..aab6778
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -0,0 +1,798 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func;
+
+ static partial class InstEmit
+ {
+ public static void Cmeq_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmge_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, m, n);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmgt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true);
+ }
+
+ public static void Cmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhi_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhi_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, m);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhs_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhs_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmle_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero());
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmlt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true);
+ }
+
+ public static void Cmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmtst_S(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: true);
+ }
+
+ public static void Cmtst_V(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: false);
+ }
+
+ public static void Facge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true, absolute: true);
+ }
+ }
+
+ public static void Facge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false, absolute: true);
+ }
+ }
+
+ public static void Facgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true, absolute: true);
+ }
+ }
+
+ public static void Facgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false, absolute: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false, absolute: true);
+ }
+ }
+
+ public static void Fccmp_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: false);
+ }
+
+ public static void Fccmpe_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: true);
+ }
+
+ public static void Fcmeq_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: true);
+ }
+ }
+
+ public static void Fcmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: false);
+ }
+ }
+
+ public static void Fcmge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true);
+ }
+ }
+
+ public static void Fcmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false);
+ }
+ }
+
+ public static void Fcmgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true);
+ }
+ }
+
+ public static void Fcmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false);
+ }
+ }
+
+ public static void Fcmle_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: true);
+ }
+ }
+
+ public static void Fcmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: false);
+ }
+ }
+
+ public static void Fcmlt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: true);
+ }
+ }
+
+ public static void Fcmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: false);
+ }
+ }
+
+ public static void Fcmp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+ else
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+ }
+
+ public static void Fcmpe_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+ else
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+ }
+
+ private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
+
+ EmitSetNzcv(context, op.Nzcv);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ EmitFcmpOrFcmpe(context, signalNaNs);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, int nzcv)
+ {
+ static Operand Extract(int value, int bit)
+ {
+ if (bit != 0)
+ {
+ value >>= bit;
+ }
+
+ value &= 1;
+
+ return Const(value);
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ bool cmpWithZero = op is not OpCodeSimdFcond && op.Bit3;
+
+ if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
+
+ CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (op.Size == 0)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand nCopy = context.Copy(n);
+ Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, nCopy, mCopy);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, nCopy, mCopy);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, nCopy, mCopy);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand nCopy = context.Copy(n);
+ Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, nCopy, mCopy);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, nCopy, mCopy);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, nCopy, mCopy);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFlag(context, PState.VFlag, Const(1));
+ SetFlag(context, PState.CFlag, Const(1));
+ SetFlag(context, PState.ZFlag, Const(0));
+ SetFlag(context, PState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = op.Size == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ }
+
+ Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs));
+
+ EmitSetNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size);
+ }
+ else
+ {
+ me = Const(0L);
+ }
+
+ Operand isTrue = emitCmp(ne, me);
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmtstOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand test = context.BitwiseAnd(ne, me);
+
+ Operand isTrue = context.ICompareNotEqual(test, Const(0L));
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmpOpF(ArmEmitterContext context, string name, bool scalar, bool absolute = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = context.VectorExtract(type, GetVec(binOp.Rm), index);
+ }
+ else
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+
+ if (absolute)
+ {
+ ne = EmitUnaryMathCall(context, nameof(Math.Abs), ne);
+ me = EmitUnaryMathCall(context, nameof(Math.Abs), me);
+ }
+
+ Operand e = EmitSoftFloatCall(context, name, ne, me);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse2OrAvxCmpOpF(ArmEmitterContext context, CmpCondition cond, bool scalar, bool absolute = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (absolute)
+ {
+ Operand mask = scalar ? X86GetScalar(context, int.MaxValue) : X86GetAllElements(context, int.MaxValue);
+
+ n = context.AddIntrinsic(Intrinsic.X86Andps, n, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Andps, m, mask);
+ }
+
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
+
+ Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ if (absolute)
+ {
+ Operand mask = scalar ? X86GetScalar(context, long.MaxValue) : X86GetAllElements(context, long.MaxValue);
+
+ n = context.AddIntrinsic(Intrinsic.X86Andpd, n, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Andpd, m, mask);
+ }
+
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
+
+ Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
new file mode 100644
index 0000000..1d68bce
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
@@ -0,0 +1,437 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func;
+
+ static partial class InstEmit32
+ {
+ public static void Vceq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), false);
+ }
+ }
+
+ public static void Vceq_I(ArmEmitterContext context)
+ {
+ EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false);
+ }
+
+ public static void Vceq_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false);
+ }
+ }
+
+ public static void Vcge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), false);
+ }
+ }
+
+ public static void Vcge_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U);
+ }
+
+ public static void Vcge_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true);
+ }
+ }
+
+ public static void Vcgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), false);
+ }
+ }
+
+ public static void Vcgt_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U);
+ }
+
+ public static void Vcgt_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseAvx)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true);
+ }
+ }
+
+ public static void Vcle_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLEFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true);
+ }
+ }
+
+ public static void Vclt_Z(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.F)
+ {
+ if (Optimizations.FastFP && Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true);
+ }
+ else if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLTFpscr), true);
+ }
+ }
+ else
+ {
+ EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true);
+ }
+ }
+
+ private static void EmitCmpOpF32(ArmEmitterContext context, string name, bool zero)
+ {
+ if (zero)
+ {
+ EmitVectorUnaryOpF32(context, (m) =>
+ {
+ Operand zeroOp = m.Type == OperandType.FP64 ? ConstF(0.0d) : ConstF(0.0f);
+
+ return EmitSoftFloatCallDefaultFpscr(context, name, m, zeroOp);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpF32(context, (n, m) =>
+ {
+ return EmitSoftFloatCallDefaultFpscr(context, name, n, m);
+ });
+ }
+ }
+
+ private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType)
+ {
+ var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1);
+
+ return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L));
+ }
+
+ private static void EmitCmpOpI32(
+ ArmEmitterContext context,
+ Func2I signedOp,
+ Func2I unsignedOp,
+ bool zero,
+ bool signed)
+ {
+ if (zero)
+ {
+ if (signed)
+ {
+ EmitVectorUnaryOpSx32(context, (m) =>
+ {
+ OperandType type = m.Type;
+ Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
+
+ return ZerosOrOnes(context, signedOp(m, zeroV), type);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (m) =>
+ {
+ OperandType type = m.Type;
+ Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
+
+ return ZerosOrOnes(context, unsignedOp(m, zeroV), type);
+ });
+ }
+ }
+ else
+ {
+ if (signed)
+ {
+ EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type));
+ }
+ }
+ }
+
+ public static void Vcmp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false);
+ }
+ else
+ {
+ EmitVcmpOrVcmpe(context, false);
+ }
+ }
+
+ public static void Vcmpe(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true);
+ }
+ else
+ {
+ EmitVcmpOrVcmpe(context, true);
+ }
+ }
+
+ private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool cmpWithZero = (op.Opc & 2) != 0;
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
+ {
+ CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
+ bool doubleSize = sizeF != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand n = GetVecA32(op.Vd >> shift);
+
+ n = EmitSwapScalar(context, n, op.Vd, doubleSize);
+ m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (!doubleSize)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
+
+ SetFpFlag(context, FPState.VFlag, Const(0));
+ SetFpFlag(context, FPState.CFlag, cf);
+ SetFpFlag(context, FPState.ZFlag, zf);
+ SetFpFlag(context, FPState.NFlag, nf);
+ }
+ else
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
+
+ SetFpFlag(context, FPState.VFlag, Const(0));
+ SetFpFlag(context, FPState.CFlag, cf);
+ SetFpFlag(context, FPState.ZFlag, zf);
+ SetFpFlag(context, FPState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFpFlag(context, FPState.VFlag, Const(1));
+ SetFpFlag(context, FPState.CFlag, Const(1));
+ SetFpFlag(context, FPState.ZFlag, Const(0));
+ SetFpFlag(context, FPState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = ExtractScalar(context, type, op.Vd);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = ExtractScalar(context, type, op.Vm);
+ }
+
+ Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs));
+
+ EmitSetFpscrNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetFpscrNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0));
+ SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1));
+ SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
+ SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitSse2OrAvxCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+ Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
+
+ if (zero)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(inst, n, m, Const((int)cond));
+ });
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
new file mode 100644
index 0000000..6226e35
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
@@ -0,0 +1,115 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ Operand roundKey = context.VectorZero();
+
+ // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+ // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens
+ res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs
new file mode 100644
index 0000000..7a0c981
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs
@@ -0,0 +1,115 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ partial class InstEmit32
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qm);
+
+ Operand res;
+
+ if (Optimizations.UseArm64Aes)
+ {
+ res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
+ }
+ else if (Optimizations.UseAesni)
+ {
+ Operand roundKey = context.VectorZero();
+
+ // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens.
+ res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);
+
+ // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens.
+ res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
+ }
+ else
+ {
+ res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs
new file mode 100644
index 0000000..3363a7c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -0,0 +1,1890 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func;
+
+ static partial class InstEmit
+ {
+ public static void Fcvt_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0 && op.Opc == 1) // Single -> Double.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ res = context.AddIntrinsic(Intrinsic.X86Pslldq, res, Const(14)); // VectorZeroUpper112()
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(14));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ }
+ else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
+ res = context.VectorZeroUpper96(res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
+ context.LoadFromContext();
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ }
+ else if (op.Size == 3 && op.Opc == 1) // Half -> Double.
+ {
+ if (Optimizations.UseF16c)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
+ res = context.VectorZeroUpper64(res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ context.StoreToContext();
+ Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne);
+ context.LoadFromContext();
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else // Invalid encoding.
+ {
+ Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
+ }
+ }
+
+ public static void Fcvtas_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+ }
+
+ public static void Fcvtas_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtas_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtau_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+ }
+
+ public static void Fcvtau_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtau_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtl_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
+ }
+ else if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
+ res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseF16c && sizeF == 0)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ if (sizeF == 0)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
+
+ context.StoreToContext();
+ Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = context.VectorInsert(res, e, index);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
+
+ Operand e = context.ConvertToFP(OperandType.FP64, ne);
+
+ res = context.VectorInsert(res, e, index);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcvtms_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
+ }
+ }
+
+ public static void Fcvtms_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtmu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
+ }
+ }
+
+ public static void Fcvtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
+ }
+ else if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn));
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Operand res = context.VectorZeroUpper64(d);
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseF16c && sizeF == 0)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Operand res = context.VectorZeroUpper64(d);
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ if (sizeF == 0)
+ {
+ context.StoreToContext();
+ Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
+ context.LoadFromContext();
+
+ res = EmitVectorInsert(context, res, e, part + index, 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand e = context.ConvertToFP(OperandType.FP32, ne);
+
+ res = context.VectorInsert(res, e, part + index);
+ }
+ }
+
+ context.Copy(d, res);
+ }
+ }
+
+ public static void Fcvtns_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1));
+ }
+ }
+
+ public static void Fcvtns_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtns_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtnu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtnu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtps_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
+ }
+ }
+
+ public static void Fcvtpu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
+ }
+ }
+
+ public static void Fcvtzs_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_s_Gp(context, (op1) => op1);
+ }
+ }
+
+ public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
+ }
+ else
+ {
+ EmitFcvtzs_Gp_Fixed(context);
+ }
+ }
+
+ public static void Fcvtzs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtzs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
+ }
+ else
+ {
+ EmitFcvt_u_Gp(context, (op1) => op1);
+ }
+ }
+
+ public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
+ }
+ else
+ {
+ EmitFcvtzu_Gp_Fixed(context);
+ }
+ }
+
+ public static void Fcvtzu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtzu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Scvtf_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
+ }
+ else
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Scvtf_S_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Scvtf_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Scvtf_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2ScvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Ucvtf_Gp(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
+ }
+ else
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Ucvtf_S_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: true);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Ucvtf_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Ucvtf_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitSse2UcvtfOp(context, scalar: false);
+ }
+ else
+ {
+ EmitCvtf(context, signed: false, scalar: false);
+ }
+ }
+
+ private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = emit(ne);
+
+ if (sizeF == 0)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
+
+ e = context.Call(info, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+
+ e = context.Call(info, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = EmitF2iFBitsMul(context, ne, fBits);
+
+ if (sizeF == 0)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
+
+ e = context.Call(info, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+
+ e = context.Call(info, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: true);
+ }
+
+ private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: false);
+ }
+
+ private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, emit(ne), 0)
+ : EmitScalarFcvtu(context, emit(ne), 0);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: true);
+ }
+
+ private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: false);
+ }
+
+ private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, ne, op.FBits)
+ : EmitScalarFcvtu(context, ne, op.FBits);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitCvtf(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
+
+ Operand e = EmitFPConvert(context, ne, sizeF, signed);
+
+ e = EmitI2fFBitsMul(context, e, fBits);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static int GetFBits(ArmEmitterContext context)
+ {
+ if (context.CurrOp is OpCodeSimdShImm op)
+ {
+ return GetImmShr(op);
+ }
+
+ return 0;
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+ Debug.Assert((uint)size < 2);
+
+ OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ MethodInfo info;
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
+ }
+ else
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64));
+ }
+
+ return context.Call(info, value);
+ }
+
+ private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ MethodInfo info;
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32));
+ }
+ else
+ {
+ info = value.Type == OperandType.FP32
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
+ }
+
+ return context.Call(info, value);
+ }
+
+ private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
+ }
+ }
+
+ private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
+ }
+ }
+
+ public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar)
+ {
+ Debug.Assert(opF.Type == OperandType.V128);
+
+ Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, opF); // opFL
+ Operand res = context.VectorCreateScalar(longL);
+
+ if (!scalar)
+ {
+ Operand opFH = context.AddIntrinsic(Intrinsic.X86Movhlps, res, opF); // res doesn't matter.
+ Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, opFH);
+ Operand resH = context.VectorCreateScalar(longH);
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, resH);
+ }
+
+ return res;
+ }
+
+ private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar)
+ {
+ Debug.Assert(op.Type == OperandType.V128);
+
+ Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL);
+
+ if (!scalar)
+ {
+ Operand opH = context.AddIntrinsic(Intrinsic.X86Movhlps, res, op); // res doesn't matter.
+ Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH);
+ Operand resH = context.AddIntrinsic(Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter.
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, resH);
+ }
+
+ return res;
+ }
+
+ private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = scalar // 65536.000f (1 << 16)
+ ? X86GetScalar(context, 0x47800000)
+ : X86GetAllElements(context, 0x47800000);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = scalar // 4294967296.0000000d (1L << 32)
+ ? X86GetScalar(context, 0x41F0000000000000L)
+ : X86GetAllElements(context, 0x41F0000000000000L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrlq, n, Const(32));
+ res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar);
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Psllq, n, Const(32));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrlq, res2, Const(32));
+ res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
+ ? X86GetScalar(context, 0x4F000000)
+ : X86GetAllElements(context, 0x4F000000);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper96(dRes);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
+ ? X86GetScalar(context, 0x43E0000000000000L)
+ : X86GetAllElements(context, 0x43E0000000000000L);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ }
+
+ private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
+ ? X86GetScalar(context, 0x4F000000)
+ : X86GetAllElements(context, 0x4F000000);
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
+ dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper96(dRes);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand fpScaledMask = scalar
+ ? X86GetScalar(context, fpScaled)
+ : X86GetAllElements(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
+ ? X86GetScalar(context, 0x43E0000000000000L)
+ : X86GetAllElements(context, 0x43E0000000000000L);
+
+ Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
+ dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
+
+ if (scalar)
+ {
+ dRes = context.VectorZeroUpper64(dRes);
+ }
+
+ context.Copy(GetVec(op.Rd), dRes);
+ }
+ }
+
+ private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
+ int fpScaled = 0x3F800000 + op.FBits * 0x800000;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ int fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x4F000000 // 2.14748365E9f (2147483648)
+ : 0x5F000000; // 9.223372E18f (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ nInt = context.SignExtend32(OperandType.I64, nInt);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
+ long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ long fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
+ : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ nLong = context.ConvertI64ToI32(nLong);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ }
+
+ private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
+ int fpScaled = 0x3F800000 + op.FBits * 0x800000;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ int fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x4F000000 // 2.14748365E9f (2147483648)
+ : 0x5F000000; // 9.223372E18f (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ nInt = context.SignExtend32(OperandType.I64, nInt);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
+ dRes = context.Add(dRes, nIntOrLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (isFixed)
+ {
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
+ long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
+
+ Operand fpScaledMask = X86GetScalar(context, fpScaled);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
+ }
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ long fpMaxVal = op.RegisterSize == RegisterSize.Int32
+ ? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
+ : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes)
+ : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ nLong = context.ConvertI64ToI32(nLong);
+ }
+
+ Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
+ dRes = context.Add(dRes, nIntOrLong);
+
+ SetIntOrZR(context, op.Rd, dRes);
+ }
+ }
+
+ private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
+ {
+ OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
+
+ return context.VectorExtract(type, GetVec(reg), index);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
new file mode 100644
index 0000000..8eef6b1
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -0,0 +1,874 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private static int FlipVdBits(int vd, bool lowBit)
+ {
+ if (lowBit)
+ {
+ // Move the low bit to the top.
+ return ((vd & 0x1) << 4) | (vd >> 1);
+ }
+ else
+ {
+ // Move the high bit to the bottom.
+ return ((vd & 0xf) << 1) | (vd >> 4);
+ }
+ }
+
+ private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
+ {
+ MethodInfo info;
+
+ if (op1.Type == OperandType.FP64)
+ {
+ info = unsigned
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
+ }
+ else
+ {
+ info = unsigned
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
+ }
+
+ return context.Call(info, op1);
+ }
+
+ public static void Vcvt_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ bool unsigned = (op.Opc & 1) != 0;
+ bool toInteger = (op.Opc & 2) != 0;
+ OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
+
+ if (toInteger)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSaturateFloatToInt(context, op1, unsigned);
+ });
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ if (unsigned)
+ {
+ Operand mask = X86GetAllElements(context, 0x47800000);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+ }
+ });
+ }
+ else
+ {
+ if (unsigned)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
+ }
+ }
+ }
+ }
+
+ public static void Vcvt_V_Fixed(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtFFixed op = (OpCode32SimdCvtFFixed)context.CurrOp;
+
+ var toFixed = op.Opc == 1;
+ int fracBits = op.Fbits;
+ var unsigned = op.U;
+
+ if (toFixed) // F32 to S32 or U32 (fixed)
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ var scaledValue = context.Multiply(op1, ConstF(MathF.Pow(2f, fracBits)));
+ MethodInfo info = unsigned ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)) : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
+
+ return context.Call(info, scaledValue);
+ });
+ }
+ else // S32 or U32 (fixed) to F32
+ {
+ EmitVectorUnaryOpI32(context, (op1) =>
+ {
+ var floatValue = unsigned ? context.ConvertToFPUI(OperandType.FP32, op1) : context.ConvertToFP(OperandType.FP32, op1);
+
+ return context.Multiply(floatValue, ConstF(1f / MathF.Pow(2f, fracBits)));
+ }, !unsigned);
+ }
+ }
+
+ public static void Vcvt_FD(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ int vm = op.Vm;
+ int vd;
+ if (op.Size == 3)
+ {
+ vd = FlipVdBits(op.Vd, false);
+ // Double to single.
+ Operand fp = ExtractScalar(context, OperandType.FP64, vm);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, fp);
+
+ InsertScalar(context, vd, res);
+ }
+ else
+ {
+ vd = FlipVdBits(op.Vd, true);
+ // Single to double.
+ Operand fp = ExtractScalar(context, OperandType.FP32, vm);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, fp);
+
+ InsertScalar(context, vd, res);
+ }
+ }
+
+ // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
+ public static void Vcvt_FI(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+
+ bool toInteger = (op.Opc2 & 0b100) != 0;
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ if (toInteger)
+ {
+ bool unsigned = (op.Opc2 & 1) == 0;
+ bool roundWithFpscr = op.Opc != 1;
+
+ if (!roundWithFpscr && Optimizations.UseAdvSimd)
+ {
+ bool doubleSize = floatSize == OperandType.FP64;
+
+ if (doubleSize)
+ {
+ Operand m = GetVecA32(op.Vm >> 1);
+
+ Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, true);
+
+ Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
+
+ Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ else
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS, false);
+ }
+ }
+ else if (!roundWithFpscr && Optimizations.UseSse41)
+ {
+ EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ // TODO: Fast Path.
+ if (roundWithFpscr)
+ {
+ toConvert = EmitRoundByRMode(context, toConvert);
+ }
+
+ // Round towards zero.
+ Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ }
+ else
+ {
+ bool unsigned = op.Opc == 0;
+
+ Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
+
+ Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
+
+ InsertScalar(context, op.Vd, asFloat);
+ }
+ }
+
+ private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
+
+ string name = nameof(Math.Round);
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
+ : typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
+
+ return context.Call(info, n, Const((int)roundMode));
+ }
+
+ private static FPRoundingMode RMToRoundMode(int rm)
+ {
+ return rm switch
+ {
+ 0b00 => FPRoundingMode.ToNearestAway,
+ 0b01 => FPRoundingMode.ToNearest,
+ 0b10 => FPRoundingMode.TowardsPlusInfinity,
+ 0b11 => FPRoundingMode.TowardsMinusInfinity,
+ _ => throw new ArgumentOutOfRangeException(nameof(rm)),
+ };
+ }
+
+ // VCVTA/M/N/P (floating-point).
+ public static void Vcvt_RM(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ bool unsigned = op.Opc == 0;
+ int rm = op.Opc2 & 3;
+
+ Intrinsic inst;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ bool doubleSize = floatSize == OperandType.FP64;
+
+ if (doubleSize)
+ {
+ Operand m = GetVecA32(op.Vm >> 1);
+
+ Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, true);
+
+ if (unsigned)
+ {
+ inst = rm switch
+ {
+ 0b00 => Intrinsic.Arm64FcvtauGp,
+ 0b01 => Intrinsic.Arm64FcvtnuGp,
+ 0b10 => Intrinsic.Arm64FcvtpuGp,
+ 0b11 => Intrinsic.Arm64FcvtmuGp,
+ _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"),
+ };
+ }
+ else
+ {
+ inst = rm switch
+ {
+ 0b00 => Intrinsic.Arm64FcvtasGp,
+ 0b01 => Intrinsic.Arm64FcvtnsGp,
+ 0b10 => Intrinsic.Arm64FcvtpsGp,
+ 0b11 => Intrinsic.Arm64FcvtmsGp,
+ _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"),
+ };
+ }
+
+ Operand asInteger = context.AddIntrinsicInt(inst | Intrinsic.Arm64VDouble, toConvert);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ else
+ {
+ if (unsigned)
+ {
+ inst = rm switch
+ {
+ 0b00 => Intrinsic.Arm64FcvtauS,
+ 0b01 => Intrinsic.Arm64FcvtnuS,
+ 0b10 => Intrinsic.Arm64FcvtpuS,
+ 0b11 => Intrinsic.Arm64FcvtmuS,
+ _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"),
+ };
+ }
+ else
+ {
+ inst = rm switch
+ {
+ 0b00 => Intrinsic.Arm64FcvtasS,
+ 0b01 => Intrinsic.Arm64FcvtnsS,
+ 0b10 => Intrinsic.Arm64FcvtpsS,
+ 0b11 => Intrinsic.Arm64FcvtmsS,
+ _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"),
+ };
+ }
+
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+ }
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
+ break;
+ }
+
+ Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
+
+ InsertScalar(context, op.Vd, asInteger);
+ }
+ }
+
+ public static void Vcvt_TB(ArmEmitterContext context)
+ {
+ OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp;
+
+ if (Optimizations.UseF16c)
+ {
+ Debug.Assert(!Optimizations.ForceLegacySse);
+
+ if (op.Op)
+ {
+ Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
+ if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res);
+ }
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ res = context.VectorExtract16(res, 0);
+ InsertScalar16(context, op.Vd, op.T, res);
+ }
+ else
+ {
+ Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T));
+ res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
+ if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
+ }
+ res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0);
+ InsertScalar(context, op.Vd, res);
+ }
+ }
+ else
+ {
+ if (op.Op)
+ {
+ // Convert to half.
+
+ Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
+
+ MethodInfo method = op.Size == 1
+ ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
+ : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(method, src);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ InsertScalar16(context, op.Vd, op.T, res);
+ }
+ else
+ {
+ // Convert from half.
+
+ Operand src = ExtractScalar16(context, op.Vm, op.T);
+
+ MethodInfo method = op.Size == 1
+ ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
+ : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(method, src);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ InsertScalar(context, op.Vd, res);
+ }
+ }
+ }
+
+ // VRINTA/M/N/P (floating-point).
+ public static void Vrint_RM(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
+
+ int rm = op.Opc2 & 3;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ Intrinsic inst = rm switch
+ {
+ 0b00 => Intrinsic.Arm64FrintaS,
+ 0b01 => Intrinsic.Arm64FrintnS,
+ 0b10 => Intrinsic.Arm64FrintpS,
+ 0b11 => Intrinsic.Arm64FrintmS,
+ _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"),
+ };
+
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ FPRoundingMode roundMode = RMToRoundMode(rm);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
+ }
+ });
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
+ break;
+ }
+
+ InsertScalar(context, op.Vd, toConvert);
+ }
+ }
+
+ // VRINTA (vector).
+ public static void Vrinta_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+ }
+ }
+
+ // VRINTM (vector).
+ public static void Vrintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
+ }
+ }
+
+ // VRINTN (vector).
+ public static void Vrintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
+ }
+ }
+
+ // VRINTP (vector).
+ public static void Vrintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
+ }
+ }
+
+ // VRINTR (floating-point).
+ public static void Vrintr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintiS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ // VRINTZ (floating-point).
+ public static void Vrint_Z(ArmEmitterContext context)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
+ }
+ }
+
+ // VRINTX (floating-point).
+ public static void Vrintx_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS);
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) =>
+ {
+ return EmitRoundByRMode(context, op1);
+ });
+ }
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ // A port of the similar round function in InstEmitSimdCvt.
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vm >> shift);
+ n = EmitSwapScalar(context, n, op.Vm, doubleSize);
+
+ if (!doubleSize)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = default;
+
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ else
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ if (roundMode != FPRoundingMode.ToNearestAway)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+ }
+ else
+ {
+ nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
+ }
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = default;
+
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+ nLong = context.ConvertI64ToI32(nLong);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ }
+
+ private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ Operand nInt2 = default;
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ Operand nLong2 = default;
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
+ }
+ }
+ });
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs
new file mode 100644
index 0000000..aee12d7
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs
@@ -0,0 +1,147 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ #region "Sha1"
+ public static void Sha1c_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashChoose)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1h_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.FixedRotate)), ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res));
+ }
+
+ public static void Sha1m_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashMajority)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1p_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity)), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart1)), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart2)), d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ #endregion
+
+ #region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ #endregion
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs
new file mode 100644
index 0000000..c2bb951
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs
@@ -0,0 +1,64 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ #region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ #endregion
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
new file mode 100644
index 0000000..a672b15
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
@@ -0,0 +1,56 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitSimdHashHelper
+ {
+ public static Operand EmitSha256h(ArmEmitterContext context, Operand x, Operand y, Operand w, bool part2)
+ {
+ if (Optimizations.UseSha)
+ {
+ Operand src1 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0xbb));
+ Operand src2 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0x11));
+ Operand w2 = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, w, w);
+
+ Operand round2 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src1, src2, w);
+ Operand round4 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src2, round2, w2);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, round4, round2, Const(part2 ? 0x11 : 0xbb));
+
+ return res;
+ }
+
+ String method = part2 ? nameof(SoftFallback.HashUpper) : nameof(SoftFallback.HashLower);
+ return context.Call(typeof(SoftFallback).GetMethod(method), x, y, w);
+ }
+
+ public static Operand EmitSha256su0(ArmEmitterContext context, Operand x, Operand y)
+ {
+ if (Optimizations.UseSha)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Sha256Msg1, x, y);
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), x, y);
+ }
+
+ public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z)
+ {
+ if (Optimizations.UseSha && Optimizations.UseSsse3)
+ {
+ Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4));
+ Operand tmp = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z);
+
+ return res;
+ }
+
+ return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs
new file mode 100644
index 0000000..abd0d9a
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -0,0 +1,2108 @@
+using ARMeilleure.CodeGen.X86;
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func;
+ using Func2I = Func;
+ using Func3I = Func;
+
+ static class InstEmitSimdHelper
+ {
+ #region "Masks"
+ public static readonly long[] EvenMasks = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0, // S
+ };
+
+ public static readonly long[] OddMasks = new long[]
+ {
+ 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
+ 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
+ 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0, // S
+ };
+
+ public const long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
+
+ public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
+ {
+ ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
+ (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0);
+
+ return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
+ }
+ #endregion
+
+ #region "X86 SSE Intrinsics"
+ public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Paddb,
+ Intrinsic.X86Paddw,
+ Intrinsic.X86Paddd,
+ Intrinsic.X86Paddq,
+ };
+
+ public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpeqb,
+ Intrinsic.X86Pcmpeqw,
+ Intrinsic.X86Pcmpeqd,
+ Intrinsic.X86Pcmpeqq,
+ };
+
+ public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpgtb,
+ Intrinsic.X86Pcmpgtw,
+ Intrinsic.X86Pcmpgtd,
+ Intrinsic.X86Pcmpgtq,
+ };
+
+ public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxsb,
+ Intrinsic.X86Pmaxsw,
+ Intrinsic.X86Pmaxsd,
+ };
+
+ public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxub,
+ Intrinsic.X86Pmaxuw,
+ Intrinsic.X86Pmaxud,
+ };
+
+ public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminsb,
+ Intrinsic.X86Pminsw,
+ Intrinsic.X86Pminsd,
+ };
+
+ public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminub,
+ Intrinsic.X86Pminuw,
+ Intrinsic.X86Pminud,
+ };
+
+ public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovsxbw,
+ Intrinsic.X86Pmovsxwd,
+ Intrinsic.X86Pmovsxdq,
+ };
+
+ public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovzxbw,
+ Intrinsic.X86Pmovzxwd,
+ Intrinsic.X86Pmovzxdq,
+ };
+
+ public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psllw,
+ Intrinsic.X86Pslld,
+ Intrinsic.X86Psllq,
+ };
+
+ public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psraw,
+ Intrinsic.X86Psrad,
+ };
+
+ public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psrlw,
+ Intrinsic.X86Psrld,
+ Intrinsic.X86Psrlq,
+ };
+
+ public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Psubb,
+ Intrinsic.X86Psubw,
+ Intrinsic.X86Psubd,
+ Intrinsic.X86Psubq,
+ };
+
+ public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpckhbw,
+ Intrinsic.X86Punpckhwd,
+ Intrinsic.X86Punpckhdq,
+ Intrinsic.X86Punpckhqdq,
+ };
+
+ public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpcklbw,
+ Intrinsic.X86Punpcklwd,
+ Intrinsic.X86Punpckldq,
+ Intrinsic.X86Punpcklqdq,
+ };
+ #endregion
+
+ public static void EnterArmFpMode(EmitterContext context, Func getFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
+
+ // X86 round modes in order: nearest, negative, positive, zero
+ // ARM round modes in order: nearest, positive, negative, zero
+ // Read the bits backwards to correct this.
+
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
+ mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ Operand fzTrue = getFpFlag(FPState.FzFlag);
+ Operand r0True = getFpFlag(FPState.RMode0Flag);
+ Operand r1True = getFpFlag(FPState.RMode1Flag);
+
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
+ fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Restore FPSR
+ }
+ }
+
+ public static void ExitArmFpMode(EmitterContext context, Action setFpFlag)
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
+
+ // Unset round mode (to nearest) and ftz.
+ mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
+
+ context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
+
+ // Status flags would be stored here if they were used.
+ }
+ else if (Optimizations.UseAdvSimd)
+ {
+ Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
+
+ // Unset round mode (to nearest) and fz.
+ fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
+
+ context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
+
+ // TODO: Store FPSR
+ }
+ }
+
+ public static int GetImmShl(OpCodeSimdShImm op)
+ {
+ return op.Imm - (8 << op.Size);
+ }
+
+ public static int GetImmShr(OpCodeSimdShImm op)
+ {
+ return (8 << (op.Size + 1)) - op.Imm;
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, float value)
+ {
+ return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, double value)
+ {
+ return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, int value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, long value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, float value)
+ {
+ return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, double value)
+ {
+ return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, short value)
+ {
+ ulong value1 = (ushort)value;
+ ulong value2 = value1 << 16 | value1;
+ ulong value4 = value2 << 32 | value2;
+
+ return X86GetAllElements(context, (long)value4);
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, int value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
+
+ return vector;
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, long value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
+
+ return vector;
+ }
+
+ public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
+ {
+ return X86GetElements(context, (ulong)e1, (ulong)e0);
+ }
+
+ public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0)
+ {
+ Operand vector0 = context.VectorCreateScalar(Const(e0));
+ Operand vector1 = context.VectorCreateScalar(Const(e1));
+
+ return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
+ }
+
+ public static int X86GetRoundControl(FPRoundingMode roundMode)
+ {
+ return roundMode switch
+ {
+#pragma warning disable IDE0055 // Disable formatting
+ FPRoundingMode.ToNearest => 8 | 0, // even
+ FPRoundingMode.TowardsPlusInfinity => 8 | 2,
+ FPRoundingMode.TowardsMinusInfinity => 8 | 1,
+ FPRoundingMode.TowardsZero => 8 | 3,
+ _ => throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."),
+#pragma warning restore IDE0055
+ };
+ }
+
+ public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
+ {
+ Debug.Assert(n.Type == OperandType.V128);
+
+ Operand nCopy = context.Copy(n);
+
+ Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));
+
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
+ }
+ else
+ {
+ Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
+ signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
+
+ // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
+ Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
+ valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);
+
+ nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
+ }
+
+ return nCopy;
+ }
+
+ public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
+ {
+ Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);
+
+ Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L)));
+
+ Operand c1 = Const(op.Type, 0x33L);
+ Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1));
+
+ return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL));
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
+ : typeof(Math).GetMethod(name, new Type[] { typeof(double) });
+
+ return context.Call(info, n);
+ }
+
+ public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ string name = nameof(Math.Round);
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
+ : typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
+
+ return context.Call(info, n, Const((int)roundMode));
+ }
+
+ public static Operand EmitGetRoundingMode(ArmEmitterContext context)
+ {
+ Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1));
+ rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag));
+
+ return rMode;
+ }
+
+ public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lbl3 = Label();
+ Operand lblEnd = Label();
+
+ Operand rN = Const((int)FPRoundingMode.ToNearest);
+ Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity);
+ Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity);
+
+ Operand res = context.AllocateLocal(op.Type);
+
+ Operand rMode = EmitGetRoundingMode(context);
+
+ context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual);
+ context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl3);
+ context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(SoftFloat32).GetMethod(name)
+ : typeof(SoftFloat64).GetMethod(name);
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(info, callArgs);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ return res;
+ }
+
+ public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
+ }
+
+ public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ res = context.VectorInsert(res, emit(ne), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
+ Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
+ Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]);
+
+ Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
+ Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
+ }
+ else if (op.Size < 3)
+ {
+ Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
+
+ Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
+ Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
+ }
+ else
+ {
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
+ }
+ }
+
+ public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
+ }
+
+ public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
+ }
+
+ private static void EmitVectorAcrossVectorOp(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool signed,
+ bool isLong)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
+
+ for (int index = 1; index < elems; index++)
+ {
+ Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ res = emit(res, n);
+ }
+
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
+
+ Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ for (int index = 1; index < 4; index++)
+ {
+ Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
+
+ res = emit(res, n);
+ }
+
+ Operand d = context.VectorInsert(context.VectorZero(), res, 0);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
+
+ const int SM0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
+ const int SM1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
+ const int SM2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
+ const int SM3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
+
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM1));
+ Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM2));
+ Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM3));
+
+ Operand res = emit(emit(part0, part1), emit(part2, part3));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+
+ public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
+
+ Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand op0, op1;
+
+ if ((op.Size & 1) == 0)
+ {
+ const int SM0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
+ const int SM1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
+
+ Operand zeroN = context.VectorZeroUpper64(n);
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(SM0));
+ op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(SM1));
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand zero = context.VectorZero();
+
+ op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
+ op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
+ }
+
+ context.Copy(GetVec(op.Rd), emit(op0, op1));
+ }
+
+ public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int pairs = op.GetPairsCount() >> sizeF + 2;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
+ Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
+
+ Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
+ Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n0, n1), index);
+ res = context.VectorInsert(res, emit(m0, m1), pairs + index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+ Operand mCopy = context.Copy(GetVec(op.Rm));
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
+
+ Operand zero = context.VectorZero();
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ const int SM0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
+ const int SM1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(SM0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(SM1));
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
+
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
+ }
+ }
+
+ public enum CmpCondition
+ {
+ // Legacy Sse.
+ Equal = 0, // Ordered, non-signaling.
+ LessThan = 1, // Ordered, signaling.
+ LessThanOrEqual = 2, // Ordered, signaling.
+ UnorderedQ = 3, // Non-signaling.
+ NotLessThan = 5, // Unordered, signaling.
+ NotLessThanOrEqual = 6, // Unordered, signaling.
+ OrderedQ = 7, // Non-signaling.
+
+ // Vex.
+ GreaterThanOrEqual = 13, // Ordered, signaling.
+ GreaterThan = 14, // Ordered, signaling.
+ OrderedS = 23, // Signaling.
+ }
+
+ [Flags]
+ public enum SaturatingFlags
+ {
+ None = 0,
+
+ ByElem = 1 << 0,
+ Scalar = 1 << 1,
+ Signed = 1 << 2,
+
+ Add = 1 << 3,
+ Sub = 1 << 4,
+
+ Accumulate = 1 << 5,
+ }
+
+ public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed);
+ }
+
+ public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed);
+ }
+
+ public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand de;
+
+ if (op.Size <= 2)
+ {
+ de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed);
+ }
+
+ public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool byElem = (flags & SaturatingFlags.ByElem) != 0;
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+ bool signed = (flags & SaturatingFlags.Signed) != 0;
+
+ bool add = (flags & SaturatingFlags.Add) != 0;
+ bool sub = (flags & SaturatingFlags.Sub) != 0;
+
+ bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ if (add || sub)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
+
+ de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ if (add)
+ {
+ de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me);
+ }
+ else /* if (sub) */
+ {
+ de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me);
+ }
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else if (accumulate)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
+ Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = context.Add(ne, me);
+
+ de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else
+ {
+ Operand me = default;
+
+ if (byElem)
+ {
+ OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op;
+
+ me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed);
+ }
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (!byElem)
+ {
+ me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
+ }
+
+ Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed);
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ [Flags]
+ public enum SaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0,
+ }
+
+ public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ Operand temp = signedSrc
+ ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst)
+ : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst);
+
+ res = EmitVectorInsert(context, res, temp, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ // long SignedSignSatQ(long op, int size);
+ public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand zeroL = Const(0L);
+ Operand maxT = Const((1L << (eSize - 1)) - 1L);
+ Operand minT = Const(-(1L << (eSize - 1)));
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
+
+ context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, minT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // private static ulong UnsignedSignSatQ(ulong op, int size);
+ public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lblEnd = Label();
+
+ Operand zeroUL = Const(0UL);
+ Operand maxT = Const(ulong.MaxValue >> (64 - eSize));
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
+
+ context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size);
+ public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
+ {
+ int eSizeDst = 8 << sizeDst;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
+ Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual);
+ context.Copy(res, minT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size);
+ public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
+ {
+ int eSizeDst = 8 << sizeDst;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
+
+ Operand lblEnd = Label();
+
+ Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI);
+ context.Copy(res, maxT);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long UnarySignedSatQAbsOrNeg(long op);
+ private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lblEnd, op, minL, Comparison.NotEqual);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQAdd(long op1, long op2);
+ public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2));
+ Operand right = context.BitwiseExclusiveOr(op1, add);
+ context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
+ context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
+ public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand maxUL = Const(ulong.MaxValue);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
+ context.Copy(res, maxUL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQSub(long op1, long op2);
+ public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand minL = Const(long.MinValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand sub = context.Subtract(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
+
+ Operand left = context.BitwiseExclusiveOr(op1, op2);
+ Operand right = context.BitwiseExclusiveOr(op1, sub);
+ context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
+ context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
+ public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lblEnd = Label();
+
+ Operand zeroL = Const(0L);
+
+ Operand sub = context.Subtract(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
+
+ context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI);
+ context.Copy(res, zeroL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long BinarySignedSatQAcc(ulong op1, long op2);
+ private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI);
+ Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add);
+ context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lbl2, op2, zeroL, Comparison.Less);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI);
+ context.Copy(res, maxL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // ulong BinaryUnsignedSatQAcc(long op1, ulong op2);
+ private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand maxUL = Const(ulong.MaxValue);
+ Operand maxL = Const(long.MaxValue);
+ Operand zeroL = Const(0L);
+
+ Operand add = context.Add(op1, op2);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
+
+ context.BranchIf(lbl1, op1, zeroL, Comparison.Less);
+ context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
+ context.Copy(res, maxUL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI);
+ context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual);
+ context.Copy(res, zeroL);
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
+ {
+ Operand mask;
+ if (single)
+ {
+ mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
+ }
+ else
+ {
+ mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
+ }
+
+ return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
+ }
+
+ public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = default;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0:
+ res = context.SignExtend8(OperandType.I64, res);
+ break;
+ case 1:
+ res = context.SignExtend16(OperandType.I64, res);
+ break;
+ case 2:
+ res = context.SignExtend32(OperandType.I64, res);
+ break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0:
+ res = context.ZeroExtend8(OperandType.I64, res);
+ break;
+ case 1:
+ res = context.ZeroExtend16(OperandType.I64, res);
+ break;
+ case 2:
+ res = context.ZeroExtend32(OperandType.I64, res);
+ break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
+ {
+ ThrowIfInvalid(index, size);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0:
+ vector = context.VectorInsert8(vector, value, index);
+ break;
+ case 1:
+ vector = context.VectorInsert16(vector, value, index);
+ break;
+ case 2:
+ vector = context.VectorInsert(vector, value, index);
+ break;
+ case 3:
+ vector = context.VectorInsert(vector, value, index);
+ break;
+ }
+
+ return vector;
+ }
+
+ public static void ThrowIfInvalid(int index, int size)
+ {
+ if ((uint)size > 3u)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if ((uint)index >= 16u >> size)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
new file mode 100644
index 0000000..2f021a1
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -0,0 +1,1320 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func;
+ using Func2I = Func;
+ using Func3I = Func;
+
+ static class InstEmitSimdHelper32
+ {
+ public static (int, int) GetQuadwordAndSubindex(int index, RegisterSize size)
+ {
+ return size switch
+ {
+ RegisterSize.Simd128 => (index >> 1, 0),
+ RegisterSize.Simd64 or RegisterSize.Int64 => (index >> 1, index & 1),
+ RegisterSize.Int32 => (index >> 2, index & 3),
+ _ => throw new ArgumentException("Unrecognized Vector Register Size."),
+ };
+ }
+
+ public static Operand ExtractScalar(ArmEmitterContext context, OperandType type, int reg)
+ {
+ Debug.Assert(type != OperandType.V128);
+
+ if (type == OperandType.FP64 || type == OperandType.I64)
+ {
+ // From dreg.
+ return context.VectorExtract(type, GetVecA32(reg >> 1), reg & 1);
+ }
+ else
+ {
+ // From sreg.
+ return context.VectorExtract(type, GetVecA32(reg >> 2), reg & 3);
+ }
+ }
+
+ public static void InsertScalar(ArmEmitterContext context, int reg, Operand value)
+ {
+ Debug.Assert(value.Type != OperandType.V128);
+
+ Operand vec, insert;
+ if (value.Type == OperandType.FP64 || value.Type == OperandType.I64)
+ {
+ // From dreg.
+ vec = GetVecA32(reg >> 1);
+ insert = context.VectorInsert(vec, value, reg & 1);
+ }
+ else
+ {
+ // From sreg.
+ vec = GetVecA32(reg >> 2);
+ insert = context.VectorInsert(vec, value, reg & 3);
+ }
+
+ context.Copy(vec, insert);
+ }
+
+ public static Operand ExtractScalar16(ArmEmitterContext context, int reg, bool top)
+ {
+ return context.VectorExtract16(GetVecA32(reg >> 2), ((reg & 3) << 1) | (top ? 1 : 0));
+ }
+
+ public static void InsertScalar16(ArmEmitterContext context, int reg, bool top, Operand value)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.I32);
+
+ Operand vec, insert;
+ vec = GetVecA32(reg >> 2);
+ insert = context.VectorInsert16(vec, value, ((reg & 3) << 1) | (top ? 1 : 0));
+
+ context.Copy(vec, insert);
+ }
+
+ public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed)
+ {
+ return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed);
+ }
+
+ public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit)
+ {
+ IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ int elems = op.Elems;
+ (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize);
+
+ Operand vec = GetVecA32(index);
+ Operand res = vec;
+
+ for (int item = 0; item < elems; item++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size);
+ }
+
+ context.Copy(vec, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Func1I emit)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(m));
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(n, m));
+ }
+
+ public static void EmitScalarBinaryOpI32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.I64 : OperandType.I32;
+
+ if (op.Size < 2)
+ {
+ throw new NotSupportedException("Cannot perform a scalar SIMD operation on integers smaller than 32 bits.");
+ }
+
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(n, m));
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = ExtractScalar(context, type, op.Vd);
+ Operand n = ExtractScalar(context, type, op.Vn);
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ InsertScalar(context, op.Vd, emit(a, n, m));
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> (sizeF + 2);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(ne, me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+ Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Integer
+
+ public static void EmitVectorUnaryAccumulateOpI32(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, context.Add(de, emit(me)), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorBinaryWideOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpZx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorImmBinaryQdQmOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpSx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorImmBinaryQdQmOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorImmBinaryQdQmOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit)
+ {
+ EmitVectorUnaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+ {
+ if (accumulate)
+ {
+ EmitVectorUnaryAccumulateOpI32(context, emit, true);
+ }
+ else
+ {
+ EmitVectorUnaryOpI32(context, emit, true);
+ }
+ }
+
+ public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorBinaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorTernaryOpI32(context, emit, true);
+ }
+
+ public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit)
+ {
+ EmitVectorUnaryOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit, bool accumulate)
+ {
+ if (accumulate)
+ {
+ EmitVectorUnaryAccumulateOpI32(context, emit, false);
+ }
+ else
+ {
+ EmitVectorUnaryOpI32(context, emit, false);
+ }
+ }
+
+ public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorBinaryOpI32(context, emit, false);
+ }
+
+ public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorTernaryOpI32(context, emit, false);
+ }
+
+ // Vector by scalar
+
+ public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+
+ res = context.VectorInsert(res, emit(ne, m), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m);
+ }
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ Operand m = ExtractScalar(context, type, op.Vm);
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index);
+ Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index);
+
+ res = context.VectorInsert(res, emit(de, ne, m), op.Fd + index);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
+
+ Operand res = GetVecA32(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, m), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Pairwise
+
+ public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> (sizeF + 2);
+ int pairs = elems >> 1;
+
+ Operand res = GetVecA32(op.Qd);
+ Operand mvec = GetVecA32(op.Qm);
+ Operand nvec = GetVecA32(op.Qn);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex);
+ Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n1, n2), op.Fd + index);
+
+ Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex);
+ Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed);
+ Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed);
+
+ Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+ Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorPairwiseLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int elems = (op.Q ? 16 : 8) >> op.Size;
+ int pairs = elems >> 1;
+ int id = (op.Vd & 1) * pairs;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+ Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1);
+ m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2);
+ }
+
+ res = EmitVectorInsert(context, res, emit(m1, m2), id + index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorPairwiseTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ Operand res = GetVecA32(op.Qd);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index * 2;
+ Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed);
+ Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1);
+ m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2);
+ }
+
+ Operand d1 = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed);
+
+ res = EmitVectorInsert(context, res, emit(m1, m2, d1), op.Id + index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Narrow
+
+ public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword)
+
+ Operand res = GetVecA32(op.Qd);
+ int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base.
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed);
+
+ res = EmitVectorInsert(context, res, emit(m), id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ // Intrinsic Helpers
+
+ public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+ {
+ Debug.Assert(input.Type == OperandType.V128);
+
+ int originalSide = originalV & 1;
+ int targetSide = targetV & 1;
+
+ if (originalSide == targetSide)
+ {
+ return input;
+ }
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // High to low.
+ }
+ }
+
+ public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ int targetSide = targetV & 1;
+ int shuffleMask = 2;
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, target, value, Const(shuffleMask));
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(shuffleMask));
+ }
+ }
+
+ public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ // Insert from index 0 in value to index in target.
+ int index = reg & (doubleWidth ? 1 : 3);
+
+ if (doubleWidth)
+ {
+ if (index == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // Low to low, keep high from original.
+ }
+ }
+ else
+ {
+ if (Optimizations.UseSse41)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4));
+ }
+ else
+ {
+ target = EmitSwapScalar(context, target, index, doubleWidth); // Swap value to replace into element 0.
+ target = context.AddIntrinsic(Intrinsic.X86Movss, target, value); // Move the value into element 0 of the vector.
+ return EmitSwapScalar(context, target, index, doubleWidth); // Swap new value back to the correct index.
+ }
+ }
+ }
+
+ public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+ {
+ // Index into 0, 0 into index. This swap happens at the start of an A32 scalar op if required.
+ int index = reg & (doubleWidth ? 1 : 3);
+ if (index == 0)
+ {
+ return target;
+ }
+
+ if (doubleWidth)
+ {
+ int shuffleMask = 1; // Swap top and bottom. (b0 = 1, b1 = 0)
+ return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask));
+ }
+ else
+ {
+ int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // Swap index and 0. (others remain)
+ shuffleMask &= ~(3 << (index * 2));
+
+ return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask));
+ }
+ }
+
+ // Vector Operand Templates
+
+ public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (side == -1)
+ {
+ side = op.Vd;
+ }
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ if (side != op.Vd)
+ {
+ res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+ }
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+ return res = context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0);
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst32, d, n, m);
+ });
+ }
+
+ public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ n = EmitSwapScalar(context, n, op.Vn, doubleSize);
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+ Operand initialD = d;
+
+ n = EmitSwapScalar(context, n, op.Vn, doubleSize);
+ m = EmitSwapScalar(context, m, op.Vm, doubleSize);
+ d = EmitSwapScalar(context, d, op.Vd, doubleSize);
+
+ Operand res = scalarFunc(d, n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+
+ Intrinsic inst = doubleSize ? inst64 : inst32;
+
+ EmitScalarTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst, d, n, m);
+ });
+ }
+
+ public static void EmitScalarTernaryOpF32(
+ ArmEmitterContext context,
+ Intrinsic inst32pt1,
+ Intrinsic inst64pt1,
+ Intrinsic inst32pt2,
+ Intrinsic inst64pt2,
+ bool isNegD = false)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+
+ Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
+
+ EmitScalarTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+
+ if (isNegD)
+ {
+ Operand mask = doubleSize
+ ? X86GetScalar(context, -0d)
+ : X86GetScalar(context, -0f);
+
+ d = doubleSize
+ ? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d)
+ : context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
+ }
+
+ return context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ // By Scalar
+
+ public static void EmitVectorByScalarOpSimd32(ArmEmitterContext context, Func2I vectorFunc)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand d = GetVecA32(op.Qd);
+
+ int index = op.Vm & 3;
+ int dupeMask = (index << 6) | (index << 4) | (index << 2) | index;
+ Operand m = GetVecA32(op.Vm >> 2);
+ m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask));
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+ EmitVectorByScalarOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorsByScalarOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ int index = op.Vm & 3;
+ int dupeMask = (index << 6) | (index << 4) | (index << 2) | index;
+ Operand m = GetVecA32(op.Vm >> 2);
+ m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask));
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
+ Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;
+
+ EmitVectorsByScalarOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(inst1, n, m);
+ return res = context.AddIntrinsic(inst2, d, res);
+ });
+ }
+
+ // Pairwise
+
+ public static void EmitSse2VectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
+
+ Operand part0 = unpck;
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, unpck, unpck);
+
+ return context.AddIntrinsic(inst32, part0, part1);
+ }, 0);
+ }
+
+ public static void EmitSsse3VectorPairwiseOp32(ArmEmitterContext context, Intrinsic[] inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
+ Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]);
+
+ Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
+ Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
+
+ return context.AddIntrinsic(inst[op.Size], left, right);
+ }
+ else if (op.Size < 3)
+ {
+ Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
+
+ Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
+ Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
+
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
+
+ return context.AddIntrinsic(inst[op.Size], left, right);
+ }
+ else
+ {
+ Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
+ Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
+
+ return context.AddIntrinsic(inst[3], left, right);
+ }
+ }, 0);
+ }
+
+ // Generic Functions
+
+ public static Operand EmitSoftFloatCallDefaultFpscr(ArmEmitterContext context, string name, params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ MethodInfo info = (op.Size & 1) == 0
+ ? typeof(SoftFloat32).GetMethod(name)
+ : typeof(SoftFloat64).GetMethod(name);
+
+ Array.Resize(ref callArgs, callArgs.Length + 1);
+ callArgs[^1] = Const(1);
+
+ context.ExitArmFpMode();
+ context.StoreToContext();
+ Operand res = context.Call(info, callArgs);
+ context.LoadFromContext();
+ context.EnterArmFpMode();
+
+ return res;
+ }
+
+ public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract32(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract32(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = default;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0:
+ res = context.SignExtend8(OperandType.I32, res);
+ break;
+ case 1:
+ res = context.SignExtend16(OperandType.I32, res);
+ break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0:
+ res = context.ZeroExtend8(OperandType.I32, res);
+ break;
+ case 1:
+ res = context.ZeroExtend16(OperandType.I32, res);
+ break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize)
+ {
+ Debug.Assert(eSize <= 32);
+
+ Operand result = eSize == 32 ? Const(0L) : Const(0);
+
+ if (eSize == 32)
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op2 = context.ZeroExtend32(OperandType.I64, op2);
+ }
+
+ for (int i = 0; i < eSize; i++)
+ {
+ Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i));
+
+ result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask));
+ }
+
+ return result;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
new file mode 100644
index 0000000..568c071
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs
@@ -0,0 +1,372 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func;
+ using Func2I = Func;
+ using Func3I = Func;
+
+ static class InstEmitSimdHelper32Arm64
+ {
+ // Intrinsic Helpers
+
+ public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
+ {
+ Debug.Assert(input.Type == OperandType.V128);
+
+ int originalSide = originalV & 1;
+ int targetSide = targetV & 1;
+
+ if (originalSide == targetSide)
+ {
+ return input;
+ }
+
+ Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
+
+ if (targetSide == 1)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
+ }
+ }
+
+ public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ int targetSide = targetV & 1;
+ Operand idx = Const(targetSide);
+
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
+ }
+
+ public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
+ {
+ Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
+
+ // Insert from index 0 in value to index in target.
+ int index = reg & (doubleWidth ? 1 : 3);
+
+ if (doubleWidth)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
+ }
+ }
+
+ public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
+ {
+ int index = reg & (doubleWidth ? 1 : 3);
+ if (index == 0)
+ {
+ return target; // Element is already at index 0, so just return the vector directly.
+ }
+
+ if (doubleWidth)
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
+ }
+ }
+
+ // Vector Operand Templates
+
+ public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
+ }
+
+ public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+
+ if (side == -1)
+ {
+ side = op.Vd;
+ }
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
+ }
+
+ Operand res = vectorFunc(n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ if (side != op.Vd)
+ {
+ res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
+ }
+ res = EmitDoubleWordInsert(context, d, res, op.Vd);
+ }
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand n = GetVecA32(op.Qn);
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
+ }
+
+ Operand res = vectorFunc(d, n, m);
+
+ if (!op.Q) // Register insert.
+ {
+ res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
+ }
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc, bool doubleSize)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ EmitScalarUnaryOpF32(context, inst, (op.Size & 1) != 0);
+ }
+
+ public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst, bool doubleSize)
+ {
+ inst |= (doubleSize ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m), doubleSize);
+ }
+
+ public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+
+ n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand res = scalarFunc(n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
+
+ context.Copy(d, res);
+ }
+
+ public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vn >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand d = GetVecA32(op.Vd >> shift);
+ Operand initialD = d;
+
+ n = EmitExtractScalar(context, n, op.Vn, doubleSize);
+ m = EmitExtractScalar(context, m, op.Vm, doubleSize);
+ d = EmitExtractScalar(context, d, op.Vd, doubleSize);
+
+ Operand res = scalarFunc(d, n, m);
+
+ // Insert scalar into vector.
+ res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
+
+ context.Copy(initialD, res);
+ }
+
+ public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
+
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+ EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
+ }
+
+ // Pairwise
+
+ public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
+ }
+
+ public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool cmpWithZero = (op.Opc & 2) != 0;
+
+ Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+ inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vd >> shift);
+ Operand m = GetVecA32(op.Vm >> shift);
+
+ n = EmitExtractScalar(context, n, op.Vd, doubleSize);
+ m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
+
+ Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+ Operand one = Const(1);
+
+ SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+ SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+ SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+ SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+ }
+
+ public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ Intrinsic inst;
+ if (zero)
+ {
+ inst = cond switch
+ {
+ CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
+ CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
+ CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
+ CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
+ CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
+ _ => throw new InvalidOperationException(),
+ };
+ }
+ else
+ {
+ inst = cond switch
+ {
+ CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
+ CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
+ CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
+ _ => throw new InvalidOperationException(),
+ };
+ }
+
+ inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
+
+ if (zero)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(inst, m);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(inst, n, m);
+ });
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
new file mode 100644
index 0000000..70dfc0f
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs
@@ -0,0 +1,720 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitSimdHelperArm64
+ {
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(inst, n)
+ : context.AddIntrinsicLong(inst, n));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+ Operand a = GetVec(op.Ra);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m));
+ }
+
+ public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+ }
+
+ public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+ }
+
+ public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, n, m);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand result = context.AddIntrinsic(inst, d, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
+ ? context.AddIntrinsicInt(inst, n, Const(fBits))
+ : context.AddIntrinsicLong(inst, n, Const(fBits)));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
+ }
+
+ public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
+ }
+
+ public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
+ }
+
+ public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m));
+ }
+
+ public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
+ }
+
+ public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
+ }
+
+ public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+ }
+
+ public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n, m);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, d, n);
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index));
+
+ context.Copy(GetVec(op.Rd), result);
+
+ context.SetPendingQcFlagSync();
+ }
+
+ public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
+ }
+
+ public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ Operand[] operands = new Operand[op.Size + 1];
+
+ operands[op.Size] = GetVec(op.Rm);
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ operands[index] = GetVec((op.Rn + index) & 0x1F);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ inst |= Intrinsic.Arm64V128;
+ }
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands));
+ }
+
+ public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ bool cmpWithZero = op is not OpCodeSimdFcond && op.Bit3;
+
+ Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
+
+ if ((op.Size & 1) != 0)
+ {
+ inst |= Intrinsic.Arm64VDouble;
+ }
+
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm);
+
+ Operand nzcv = context.AddIntrinsicInt(inst, n, m);
+
+ Operand one = Const(1);
+
+ SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
+ SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
+ SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs
new file mode 100644
index 0000000..ace8e4c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -0,0 +1,613 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void And_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Bic_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bic_Vi(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand imm = eSize switch
+ {
+ 16 => X86GetAllElements(context, (short)~op.Immediate),
+ 32 => X86GetAllElements(context, (int)~op.Immediate),
+ _ => throw new InvalidOperationException($"Invalid element size {eSize}."),
+ };
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, d, imm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bif_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
+ }
+ else
+ {
+ EmitBifBit(context, notRm: true);
+ }
+ }
+
+ public static void Bit_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
+ }
+ else
+ {
+ EmitBifBit(context, notRm: false);
+ }
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+
+ if (notRm)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
+ Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
+ Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
+
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+
+ Operand e = context.BitwiseExclusiveOr(d, n);
+
+ e = context.BitwiseAnd(e, m);
+ e = context.BitwiseExclusiveOr(e, d);
+
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Bsl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Eor_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Not_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAvx512Ortho)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Orn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
+ }
+ else if (Optimizations.UseAvx512Ortho)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseOr(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Orr_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Orr_Vi(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand imm = eSize switch
+ {
+ 16 => X86GetAllElements(context, (short)op.Immediate),
+ 32 => X86GetAllElements(context, (int)op.Immediate),
+ _ => throw new InvalidOperationException($"Invalid element size {eSize}."),
+ };
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, d, imm);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Rbit_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseGfni)
+ {
+ const long BitMatrix =
+ (0b10000000L << 56) |
+ (0b01000000L << 48) |
+ (0b00100000L << 40) |
+ (0b00010000L << 32) |
+ (0b00001000L << 24) |
+ (0b00000100L << 16) |
+ (0b00000010L << 8) |
+ (0b00000001L << 0);
+
+ Operand vBitMatrix = X86GetAllElements(context, BitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de = EmitReverseBits8Op(context, ne);
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(op.Type == OperandType.I64);
+
+ Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)),
+ context.ShiftLeft(context.BitwiseAnd(op, Const(0x55ul)), Const(1)));
+
+ val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x33ul)), Const(2)));
+
+ return context.BitwiseOr(context.ShiftRightUI(val, Const(4)),
+ context.ShiftLeft(context.BitwiseAnd(val, Const(0x0ful)), Const(4)));
+ }
+
+ public static void Rev16_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const long MaskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
+ const long MaskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
+
+ Operand mask = X86GetScalar(context, MaskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 1);
+ }
+ }
+
+ public static void Rev32_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long MaskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
+ const long MaskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
+
+ mask = X86GetScalar(context, MaskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3);
+ }
+ else /* if (op.Size == 1) */
+ {
+ const long MaskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
+ const long MaskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
+
+ mask = X86GetScalar(context, MaskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 2);
+ }
+ }
+
+ public static void Rev64_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long MaskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
+ const long MaskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
+
+ mask = X86GetScalar(context, MaskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3);
+ }
+ else if (op.Size == 1)
+ {
+ const long MaskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
+ const long MaskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
+
+ mask = X86GetScalar(context, MaskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3);
+ }
+ else /* if (op.Size == 2) */
+ {
+ const long MaskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
+ const long MaskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
+
+ mask = X86GetScalar(context, MaskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 3);
+ }
+ }
+
+ private static void EmitRev_V(ArmEmitterContext context, int containerSize)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int containerMask = (1 << (containerSize - op.Size)) - 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ int revIndex = index ^ containerMask;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
new file mode 100644
index 0000000..26d0934
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
@@ -0,0 +1,278 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vand_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Vbic_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, context.BitwiseNot(op2)));
+ }
+ }
+
+ public static void Vbic_II(ArmEmitterContext context)
+ {
+ OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+ long immediate = op.Immediate;
+
+ // Replicate fields to fill the 64-bits, if size is < 64-bits.
+ switch (op.Size)
+ {
+ case 0:
+ immediate *= 0x0101010101010101L;
+ break;
+ case 1:
+ immediate *= 0x0001000100010001L;
+ break;
+ case 2:
+ immediate *= 0x0000000100000001L;
+ break;
+ }
+
+ Operand imm = Const(immediate);
+ Operand res = GetVecA32(op.Qd);
+
+ if (op.Q)
+ {
+ for (int elem = 0; elem < 2; elem++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), elem, 3);
+ }
+ }
+ else
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), op.Vd & 1, 3);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vbif(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else
+ {
+ EmitBifBit(context, true);
+ }
+ }
+
+ public static void Vbit(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else
+ {
+ EmitBifBit(context, false);
+ }
+ }
+
+ public static void Vbsl(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+ });
+ }
+ else
+ {
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Veor_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Vorn_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseAvx512Ortho)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
+ });
+ }
+ else if (Optimizations.UseSse2)
+ {
+ Operand mask = context.VectorOne();
+
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+ return context.AddIntrinsic(Intrinsic.X86Por, n, m);
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, context.BitwiseNot(op2)));
+ }
+ }
+
+ public static void Vorr_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m));
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
+ }
+ else
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Vorr_II(ArmEmitterContext context)
+ {
+ OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+ long immediate = op.Immediate;
+
+ // Replicate fields to fill the 64-bits, if size is < 64-bits.
+ switch (op.Size)
+ {
+ case 0:
+ immediate *= 0x0101010101010101L;
+ break;
+ case 1:
+ immediate *= 0x0001000100010001L;
+ break;
+ case 2:
+ immediate *= 0x0000000100000001L;
+ break;
+ }
+
+ Operand imm = Const(immediate);
+ Operand res = GetVecA32(op.Qd);
+
+ if (op.Q)
+ {
+ for (int elem = 0; elem < 2; elem++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3);
+ }
+ }
+ else
+ {
+ Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+ res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vtst(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) =>
+ {
+ Operand isZero = context.ICompareEqual(context.BitwiseAnd(op1, op2), Const(0));
+ return context.ConditionalSelect(isZero, Const(0), Const(-1));
+ });
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+ res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
+ return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+ });
+ }
+ else
+ {
+ EmitVectorTernaryOpZx32(context, (d, n, m) =>
+ {
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(m,
+ context.BitwiseExclusiveOr(d, n)), d);
+ });
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs
new file mode 100644
index 0000000..dedf0fa
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs
@@ -0,0 +1,162 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ld__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: true);
+ }
+
+ public static void Ld__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: true);
+ }
+
+ public static void St__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: false);
+ }
+
+ public static void St__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: false);
+ }
+
+ private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+#pragma warning disable IDE0055 // Disable formatting
+ for (int rep = 0; rep < op.Reps; rep++)
+ for (int elem = 0; elem < op.Elems; elem++)
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rtt = (op.Rt + rep + sElem) & 0x1f;
+
+ Operand tt = GetVec(rtt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, tt, rtt, elem, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1)
+ {
+ context.Copy(tt, context.VectorZeroUpper64(tt));
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rtt, elem, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+#pragma warning restore IDE0055
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ if (op.Replicate)
+ {
+ // Only loads uses the replicate mode.
+ Debug.Assert(isLoad, "Replicate mode is not valid for stores.");
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ for (int index = 0; index < elems; index++)
+ {
+ EmitLoadSimd(context, address, t, rt, index, op.Size);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ context.Copy(t, context.VectorZeroUpper64(t));
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+ else
+ {
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, t, rt, op.Index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rt, op.Index, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemWBack(ArmEmitterContext context, long offset)
+ {
+ OpCodeMemReg op = (OpCodeMemReg)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+ Operand m;
+
+ if (op.Rm != RegisterAlias.Zr)
+ {
+ m = GetIntOrZR(context, op.Rm);
+ }
+ else
+ {
+ m = Const(offset);
+ }
+
+ context.Copy(n, context.Add(n, m));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs
new file mode 100644
index 0000000..35c6dd3
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs
@@ -0,0 +1,352 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vld1(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 1, true);
+ }
+
+ public static void Vld2(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 2, true);
+ }
+
+ public static void Vld3(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 3, true);
+ }
+
+ public static void Vld4(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 4, true);
+ }
+
+ public static void Vst1(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 1, false);
+ }
+
+ public static void Vst2(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 2, false);
+ }
+
+ public static void Vst3(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 3, false);
+ }
+
+ public static void Vst4(ArmEmitterContext context)
+ {
+ EmitVStoreOrLoadN(context, 4, false);
+ }
+
+ public static void EmitVStoreOrLoadN(ArmEmitterContext context, int count, bool load)
+ {
+ if (context.CurrOp is OpCode32SimdMemSingle)
+ {
+ OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp;
+
+ int eBytes = 1 << op.Size;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ // TODO: Check alignment.
+ int offset = 0;
+ int d = op.Vd;
+
+ for (int i = 0; i < count; i++)
+ {
+ // Accesses an element from a double simd register.
+ Operand address = context.Add(n, Const(offset));
+ if (eBytes == 8)
+ {
+ if (load)
+ {
+ EmitDVectorLoad(context, address, d);
+ }
+ else
+ {
+ EmitDVectorStore(context, address, d);
+ }
+ }
+ else
+ {
+ int index = ((d & 1) << (3 - op.Size)) + op.Index;
+ if (load)
+ {
+ if (op.Replicate)
+ {
+ var regs = (count > 1) ? 1 : op.Increment;
+ for (int reg = 0; reg < regs; reg++)
+ {
+ int dreg = reg + d;
+ int rIndex = ((dreg & 1) << (3 - op.Size));
+ int limit = rIndex + (1 << (3 - op.Size));
+
+ while (rIndex < limit)
+ {
+ EmitLoadSimd(context, address, GetVecA32(dreg >> 1), dreg >> 1, rIndex++, op.Size);
+ }
+ }
+ }
+ else
+ {
+ EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size);
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, d >> 1, index, op.Size);
+ }
+ }
+ offset += eBytes;
+ d += op.Increment;
+ }
+
+ if (op.WBack)
+ {
+ if (op.RegisterIndex)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ SetIntA32(context, op.Rn, context.Add(n, m));
+ }
+ else
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(count * eBytes)));
+ }
+ }
+ }
+ else
+ {
+ OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp;
+
+ int increment = count > 1 ? op.Increment : 1;
+ int eBytes = 1 << op.Size;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+ int offset = 0;
+ int d = op.Vd;
+
+ for (int reg = 0; reg < op.Regs; reg++)
+ {
+ for (int elem = 0; elem < op.Elems; elem++)
+ {
+ int elemD = d + reg;
+ for (int i = 0; i < count; i++)
+ {
+ // Accesses an element from a double simd register,
+ // add ebytes for each element.
+ Operand address = context.Add(n, Const(offset));
+ int index = ((elemD & 1) << (3 - op.Size)) + elem;
+ if (eBytes == 8)
+ {
+ if (load)
+ {
+ EmitDVectorLoad(context, address, elemD);
+ }
+ else
+ {
+ EmitDVectorStore(context, address, elemD);
+ }
+ }
+ else
+ {
+ if (load)
+ {
+ EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, elemD >> 1, index, op.Size);
+ }
+ }
+
+ offset += eBytes;
+ elemD += increment;
+ }
+ }
+ }
+
+ if (op.WBack)
+ {
+ if (op.RegisterIndex)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+ SetIntA32(context, op.Rn, context.Add(n, m));
+ }
+ else
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(count * 8 * op.Regs)));
+ }
+ }
+ }
+ }
+
+ public static void Vldm(ArmEmitterContext context)
+ {
+ OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writeBack = op.PostOffset != 0;
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int range = op.RegisterRange;
+
+ int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd;
+ int offset = 0;
+ int byteSize = 4;
+
+ for (int num = 0; num < range; num++, sReg++)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+ Operand vec = GetVecA32(sReg >> 2);
+
+ EmitLoadSimd(context, address, vec, sReg >> 2, sReg & 3, WordSizeLog2);
+ offset += byteSize;
+ }
+ }
+
+ public static void Vstm(ArmEmitterContext context)
+ {
+ OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writeBack = op.PostOffset != 0;
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int offset = 0;
+
+ int range = op.RegisterRange;
+ int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd;
+ int byteSize = 4;
+
+ for (int num = 0; num < range; num++, sReg++)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStoreSimd(context, address, sReg >> 2, sReg & 3, WordSizeLog2);
+
+ offset += byteSize;
+ }
+ }
+
+ public static void Vldr(ArmEmitterContext context)
+ {
+ EmitVLoadOrStore(context, AccessType.Load);
+ }
+
+ public static void Vstr(ArmEmitterContext context)
+ {
+ EmitVLoadOrStore(context, AccessType.Store);
+ }
+
+ private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD)
+ {
+ int vecQ = vecD >> 1;
+ int vecSElem = (vecD & 1) << 1;
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2);
+ EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2);
+ EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD)
+ {
+ int vecQ = vecD >> 1;
+ int vecSElem = (vecD & 1) << 1;
+ Operand vec = GetVecA32(vecQ);
+
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2);
+ EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2);
+ EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType)
+ {
+ OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+ Operand m = GetMemM(context, setCarry: false);
+
+ Operand address = op.Add
+ ? context.Add(n, m)
+ : context.Subtract(n, m);
+
+ int size = op.Size;
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ if (size == DWordSizeLog2)
+ {
+ EmitDVectorLoad(context, address, op.Vd);
+ }
+ else
+ {
+ Operand vec = GetVecA32(op.Vd >> 2);
+ EmitLoadSimd(context, address, vec, op.Vd >> 2, (op.Vd & 3) << (2 - size), size);
+ }
+ }
+ else
+ {
+ if (size == DWordSizeLog2)
+ {
+ EmitDVectorStore(context, address, op.Vd);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, op.Vd >> 2, (op.Vd & 3) << (2 - size), size);
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs
new file mode 100644
index 0000000..85c98fe
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs
@@ -0,0 +1,877 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ #region "Masks"
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0,
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0,
+ };
+ #endregion
+
+ public static void Dup_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if (Optimizations.UseSse2)
+ {
+ switch (op.Size)
+ {
+ case 0:
+ n = context.ZeroExtend8(n.Type, n);
+ n = context.Multiply(n, Const(n.Type, 0x01010101));
+ break;
+ case 1:
+ n = context.ZeroExtend16(n.Type, n);
+ n = context.Multiply(n, Const(n.Type, 0x00010001));
+ break;
+ case 2:
+ n = context.ZeroExtend32(n.Type, n);
+ break;
+ }
+
+ Operand res = context.VectorInsert(context.VectorZero(), n, 0);
+
+ if (op.Size < 3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, n, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Dup_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
+ }
+
+ public static void Dup_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand res = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 1)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 2)
+ {
+ int mask = op.DstIndex * 0b01010101;
+
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
+ }
+ else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+ else if (op.DstIndex == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ext_V(ArmEmitterContext context)
+ {
+ OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand nShifted = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ nShifted = context.VectorZeroUpper64(nShifted);
+ }
+
+ nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
+
+ Operand mShifted = GetVec(op.Rm);
+
+ mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mShifted = context.VectorZeroUpper64(mShifted);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int bytes = op.GetBytesCount();
+
+ int position = op.Imm4 & (bytes - 1);
+
+ for (int index = 0; index < bytes; index++)
+ {
+ int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
+
+ Operand e = EmitVectorExtractZx(context, reg, position, 0);
+
+ position = (position + 1) & (bytes - 1);
+
+ res = EmitVectorInsert(context, res, e, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcsel_S(ArmEmitterContext context)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
+
+ context.BranchIfTrue(lblTrue, isTrue);
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ public static void Fmov_Ftoi(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Ftoi1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Itof(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
+ }
+
+ public static void Fmov_Itof1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, 1, 3));
+ }
+
+ public static void Fmov_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+ }
+
+ public static void Fmov_Si(ArmEmitterContext context)
+ {
+ OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ if (op.Size == 0)
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+ else
+ {
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ res = EmitVectorInsert(context, res, e, 0, op.Size + 2);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fmov_Vi(ArmEmitterContext context)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Copy(GetVec(op.Rd), X86GetAllElements(context, op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+ else
+ {
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ins_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
+ }
+
+ public static void Ins_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
+
+ context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
+ }
+
+ public static void Movi_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitSse2VectorMoviMvniOp(context, not: false);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => op1);
+ }
+ }
+
+ public static void Mvni_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitSse2VectorMoviMvniOp(context, not: true);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Smov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ ne = context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Tbl_V(ArmEmitterContext context)
+ {
+ EmitTableVectorLookup(context, isTbl: true);
+ }
+
+ public static void Tbx_V(ArmEmitterContext context)
+ {
+ EmitTableVectorLookup(context, isTbl: false);
+ }
+
+ public static void Trn1_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 0);
+ }
+
+ public static void Trn2_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 1);
+ }
+
+ public static void Umov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Uzp1_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 0);
+ }
+
+ public static void Uzp2_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 1);
+ }
+
+ public static void Xtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Operand res = context.VectorZeroUpper64(d);
+
+ Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, GetVec(op.Rn), mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, res, res2);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ res = EmitVectorInsert(context, res, ne, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+ }
+
+ public static void Zip1_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 0);
+ }
+
+ public static void Zip2_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 1);
+ }
+
+ private static void EmitSse2VectorMoviMvniOp(ArmEmitterContext context, bool not)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ long imm = op.Immediate;
+
+ switch (op.Size)
+ {
+ case 0:
+ imm *= 0x01010101;
+ break;
+ case 1:
+ imm *= 0x00010001;
+ break;
+ }
+
+ if (not)
+ {
+ imm = ~imm;
+ }
+
+ Operand mask;
+
+ if (op.Size < 3)
+ {
+ mask = X86GetAllElements(context, (int)imm);
+ }
+ else
+ {
+ mask = X86GetAllElements(context, imm);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mask = context.VectorZeroUpper64(mask);
+ }
+
+ context.Copy(GetVec(op.Rd), mask);
+ }
+
+ private static void EmitTableVectorLookup(ArmEmitterContext context, bool isTbl)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand m = GetVec(op.Rm);
+
+ Operand res;
+
+ Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
+
+ // Fast path for single register table.
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+ }
+
+ for (int index = 1; index < op.Size; index++)
+ {
+ Operand ni = GetVec((op.Rn + index) & 0x1F);
+
+ Operand idxMask = X86GetAllElements(context, 0x1010101010101010L * index);
+
+ Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (!isTbl)
+ {
+ Operand idxMask = X86GetAllElements(context, (0x1010101010101010L * op.Size) - 0x0101010101010101L);
+ Operand zeroMask = context.VectorZero();
+
+ Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask);
+ Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask);
+
+ Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, d, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand d = GetVec(op.Rd);
+
+ List args = new();
+
+ if (!isTbl)
+ {
+ args.Add(d);
+ }
+
+ args.Add(GetVec(op.Rm));
+
+ args.Add(Const(op.RegisterSize == RegisterSize.Simd64 ? 8 : 16));
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ args.Add(GetVec((op.Rn + index) & 0x1F));
+ }
+
+ MethodInfo info = null;
+
+ if (isTbl)
+ {
+ switch (op.Size)
+ {
+ case 1:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1));
+ break;
+ case 2:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2));
+ break;
+ case 3:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3));
+ break;
+ case 4:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4));
+ break;
+ }
+ }
+ else
+ {
+ switch (op.Size)
+ {
+ case 1:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1));
+ break;
+ case 2:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2));
+ break;
+ case 3:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3));
+ break;
+ case 4:
+ info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4));
+ break;
+ }
+ }
+
+ context.Copy(d, context.Call(info, args.ToArray()));
+ }
+ }
+
+ private static void EmitVectorTranspose(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorUnzip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, n, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int idx = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorZip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ int baseIndex = part != 0 ? pairs : 0;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs
new file mode 100644
index 0000000..fb2641f
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -0,0 +1,675 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ #region "Masks"
+ // Same as InstEmitSimdMove, as the instructions do the same thing.
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0,
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0,
+ };
+ #endregion
+
+ public static void Vmov_I(ArmEmitterContext context)
+ {
+ EmitVectorImmUnaryOp32(context, (op1) => op1);
+ }
+
+ public static void Vmvn_I(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAvx512Ortho)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101));
+ });
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpSimd32(context, (op1) =>
+ {
+ Operand mask = X86GetAllElements(context, -1L);
+ return context.AddIntrinsic(Intrinsic.X86Pandn, op1, mask);
+ });
+ }
+ else
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Vmvn_II(ArmEmitterContext context)
+ {
+ EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseNot(op1));
+ }
+
+ public static void Vmov_GS(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vn >> 2);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3);
+ SetIntA32(context, op.Rt, value);
+ }
+ else
+ {
+ // From general purpose.
+ Operand value = GetIntA32(context, op.Rt);
+ context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3));
+ }
+ }
+
+ public static void Vmov_G1(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp;
+
+ int index = op.Index + ((op.Vd & 1) << (3 - op.Size));
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U);
+ SetIntA32(context, op.Rt, value);
+ }
+ else
+ {
+ // From general purpose.
+ Operand vec = GetVecA32(op.Vd >> 1);
+ Operand value = GetIntA32(context, op.Rt);
+ context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size));
+ }
+ }
+
+ public static void Vmov_G2(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vm >> 2);
+ int vm1 = op.Vm + 1;
+ bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2);
+ Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3);
+ SetIntA32(context, op.Rt, lowValue);
+
+ Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3);
+ SetIntA32(context, op.Rt2, highValue);
+ }
+ else
+ {
+ // From general purpose.
+ Operand lowValue = GetIntA32(context, op.Rt);
+ Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3);
+
+ Operand highValue = GetIntA32(context, op.Rt2);
+
+ if (sameOwnerVec)
+ {
+ context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3));
+ }
+ else
+ {
+ context.Copy(vec, resultVec);
+ context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3));
+ }
+ }
+ }
+
+ public static void Vmov_GD(ArmEmitterContext context)
+ {
+ OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp;
+
+ Operand vec = GetVecA32(op.Vm >> 1);
+ if (op.Op == 1)
+ {
+ // To general purpose.
+ Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1);
+ SetIntA32(context, op.Rt, context.ConvertI64ToI32(value));
+ SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32))));
+ }
+ else
+ {
+ // From general purpose.
+ Operand lowValue = GetIntA32(context, op.Rt);
+ Operand highValue = GetIntA32(context, op.Rt2);
+
+ Operand value = context.BitwiseOr(
+ context.ZeroExtend32(OperandType.I64, lowValue),
+ context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32)));
+
+ context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1));
+ }
+ }
+
+ public static void Vmovl(ArmEmitterContext context)
+ {
+ OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+ if (op.Size == 2)
+ {
+ if (op.U)
+ {
+ me = context.ZeroExtend32(OperandType.I64, me);
+ }
+ else
+ {
+ me = context.SignExtend32(OperandType.I64, me);
+ }
+ }
+
+ res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vswp(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ if (op.Q)
+ {
+ Operand temp = context.Copy(GetVecA32(op.Qd));
+
+ context.Copy(GetVecA32(op.Qd), GetVecA32(op.Qm));
+ context.Copy(GetVecA32(op.Qm), temp);
+ }
+ else
+ {
+ Operand temp = ExtractScalar(context, OperandType.I64, op.Vd);
+
+ InsertScalar(context, op.Vd, ExtractScalar(context, OperandType.I64, op.Vm));
+ InsertScalar(context, op.Vm, temp);
+ }
+ }
+
+ public static void Vtbl(ArmEmitterContext context)
+ {
+ OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
+
+ bool extension = op.Opc == 1;
+ int length = op.Length + 1;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVecA32(op.Qd);
+ Operand m = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qm), op.Vm, 0);
+
+ Operand res;
+ Operand mask = X86GetAllElements(context, 0x0707070707070707L);
+
+ // Fast path for single register table.
+ {
+ Operand n = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qn), op.Vn, 0);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+ }
+
+ for (int index = 1; index < length; index++)
+ {
+ int newVn = (op.Vn + index) & 0x1F;
+ (int qn, _) = GetQuadwordAndSubindex(newVn, op.RegisterSize);
+ Operand ni = EmitMoveDoubleWordToSide(context, GetVecA32(qn), newVn, 0);
+
+ Operand idxMask = X86GetAllElements(context, 0x0808080808080808L * index);
+
+ Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask);
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (extension)
+ {
+ Operand idxMask = X86GetAllElements(context, (0x0808080808080808L * length) - 0x0101010101010101L);
+ Operand zeroMask = context.VectorZero();
+
+ Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask);
+ Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask);
+
+ Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, EmitMoveDoubleWordToSide(context, d, op.Vd, 0), mMask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask);
+ }
+
+ res = EmitMoveDoubleWordToSide(context, res, 0, op.Vd);
+
+ context.Copy(d, EmitDoubleWordInsert(context, d, res, op.Vd));
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+
+ (int Qx, int Ix)[] tableTuples = new (int, int)[length];
+ for (int i = 0; i < length; i++)
+ {
+ tableTuples[i] = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize);
+ }
+
+ int byteLength = length * 8;
+
+ Operand res = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im));
+
+ Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength));
+ Operand elemRes = default; // Note: This is I64 for ease of calculation.
+
+ // TODO: Branching rather than conditional select.
+
+ // Get indexed byte.
+ // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result.
+ // This does have to extract `length` times for every element but certainly not as bad as it could be.
+
+ // Which vector number is the index on.
+ Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3));
+ // What should we shift by to extract it.
+ Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3));
+
+ for (int i = 0; i < length; i++)
+ {
+ (int qx, int ix) = tableTuples[i];
+ // Get the whole vector, we'll get a byte out of it.
+ Operand lookupResult;
+ if (qx == op.Qd)
+ {
+ // Result contains the current state of the vector.
+ lookupResult = context.VectorExtract(OperandType.I64, res, ix);
+ }
+ else
+ {
+ lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64
+ }
+
+ lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector.
+
+ if (i == 0)
+ {
+ elemRes = lookupResult; // First result is always default.
+ }
+ else
+ {
+ Operand isThisElem = context.ICompareEqual(vecIndex, Const(i));
+ elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes);
+ }
+ }
+
+ Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L);
+
+ res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+ }
+
+ public static void Vtrn(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ if (op.Size < 3)
+ {
+ d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+ Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m);
+
+ return (resM, resD);
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false);
+ Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false);
+
+ resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ public static void Vzip(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V);
+ }
+ else if (Optimizations.UseSse2)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+ Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m);
+
+ return (resM, resD);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m);
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero());
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero());
+ return (resM, resD);
+ }
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+ Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false);
+ Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false);
+
+ Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false);
+ Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false);
+
+ resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size);
+ resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size);
+ resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ public static void Vuzp(ArmEmitterContext context)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ EmitVectorShuffleOpSimd32(context, (m, d) =>
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = default;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = EvenMasks[op.Size];
+ long maskE1 = OddMasks[op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask);
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, d, m);
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, d, m);
+
+ return (resM, resD);
+ }
+ else
+ {
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, d, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero());
+ Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero());
+
+ return (resM, resD);
+ }
+ });
+ }
+ else
+ {
+ int elems = op.GetBytesCount() >> op.Size;
+ int pairs = elems >> 1;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand resD = GetVecA32(op.Qd);
+ Operand resM = GetVecA32(op.Qm);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand dIns, mIns;
+ if (index >= pairs)
+ {
+ int pairIndex = index - pairs;
+ dIns = EmitVectorExtract32(context, op.Qm, (pairIndex << 1) + op.Im, op.Size, false);
+ mIns = EmitVectorExtract32(context, op.Qm, ((pairIndex << 1) | 1) + op.Im, op.Size, false);
+ }
+ else
+ {
+ dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false);
+ mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false);
+ }
+
+ resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size);
+
+ if (overlap)
+ {
+ resM = resD;
+ }
+
+ resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size);
+
+ if (overlap)
+ {
+ resD = resM;
+ }
+ }
+
+ context.Copy(GetVecA32(op.Qd), resD);
+ if (!overlap)
+ {
+ context.Copy(GetVecA32(op.Qm), resM);
+ }
+ }
+ }
+
+ private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2)
+ {
+ OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
+
+ bool overlap = op.Qm == op.Qd;
+
+ Operand d = GetVecA32(op.Qd);
+ Operand m = GetVecA32(op.Qm);
+
+ Operand dPart = d;
+ Operand mPart = m;
+
+ if (!op.Q) // Register swap: move relevant doubleword to destination side.
+ {
+ dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+ mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+ }
+
+ Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64;
+
+ vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
+
+ Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart);
+ Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart);
+
+ if (!op.Q) // Register insert.
+ {
+ resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0));
+
+ if (overlap)
+ {
+ resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0));
+ }
+ else
+ {
+ resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0));
+ }
+ }
+
+ context.Copy(d, resD);
+ if (!overlap)
+ {
+ context.Copy(m, resM);
+ }
+ }
+
+ private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func shuffleFunc)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand m = GetVecA32(op.Qm);
+ Operand d = GetVecA32(op.Qd);
+ Operand initialM = m;
+ Operand initialD = d;
+
+ if (!op.Q) // Register swap: move relevant doubleword to side 0, for consistency.
+ {
+ m = EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
+ d = EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
+ }
+
+ (Operand resM, Operand resD) = shuffleFunc(m, d);
+
+ bool overlap = op.Qm == op.Qd;
+
+ if (!op.Q) // Register insert.
+ {
+ resM = EmitDoubleWordInsert(context, initialM, EmitMoveDoubleWordToSide(context, resM, 0, op.Vm), op.Vm);
+ resD = EmitDoubleWordInsert(context, overlap ? resM : initialD, EmitMoveDoubleWordToSide(context, resD, 0, op.Vd), op.Vd);
+ }
+
+ if (!overlap)
+ {
+ context.Copy(initialM, resM);
+ }
+
+ context.Copy(initialD, resD);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs
new file mode 100644
index 0000000..94e9125
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -0,0 +1,1935 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func;
+
+ static partial class InstEmit
+ {
+ #region "Masks"
+ private static readonly long[] _masks_SliSri = new long[] // Replication masks.
+ {
+ 0x0101010101010101L, 0x0001000100010001L, 0x0000000100000001L, 0x0000000000000001L,
+ };
+ #endregion
+
+ public static void Rshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.VectorZeroUpper64(d);
+
+ Operand mask = default;
+
+ switch (op.Size + 1)
+ {
+ case 1:
+ mask = X86GetAllElements(context, (int)roundConst * 0x00010001);
+ break;
+ case 2:
+ mask = X86GetAllElements(context, (int)roundConst);
+ break;
+ case 3:
+ mask = X86GetAllElements(context, roundConst);
+ break;
+ }
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ Operand res = context.AddIntrinsic(addInst, n, mask);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(srlInst, res, Const(shift));
+
+ Operand mask2 = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: true);
+ }
+ }
+
+ public static void Shl_S(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
+ }
+ else
+ {
+ EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+ int eSize = 8 << op.Size;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
+ }
+ else if (shift >= eSize)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shll_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int shift = 8 << op.Size;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Shrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
+ }
+ else if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.VectorZeroUpper64(d);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: false);
+ }
+ }
+
+ public static void Sli_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
+ }
+ else
+ {
+ EmitSli(context, scalar: true);
+ }
+ }
+
+ public static void Sli_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
+ }
+ else
+ {
+ EmitSli(context, scalar: false);
+ }
+ }
+
+ public static void Sqrshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Sqrshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqrshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqrshrun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqrshrun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Sqshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Sqshl_Si(ArmEmitterContext context)
+ {
+ EmitShlImmOp(context, signedDst: true, ShlRegFlags.Signed | ShlRegFlags.Scalar | ShlRegFlags.Saturating);
+ }
+
+ public static void Sqshl_Vi(ArmEmitterContext context)
+ {
+ EmitShlImmOp(context, signedDst: true, ShlRegFlags.Signed | ShlRegFlags.Saturating);
+ }
+
+ public static void Sqshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+ }
+
+ public static void Sqshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+ }
+
+ public static void Sqshrun_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+ }
+
+ public static void Sqshrun_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+ }
+
+ public static void Sri_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
+ }
+ else
+ {
+ EmitSri(context, scalar: true);
+ }
+ }
+
+ public static void Sri_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
+ }
+ else
+ {
+ EmitSri(context, scalar: false);
+ }
+ }
+
+ public static void Srshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
+ }
+ }
+
+ public static void Srshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srsra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Srsra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Sshl_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
+ }
+ }
+
+ public static void Sshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Signed);
+ }
+ }
+
+ public static void Sshll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Sshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+ }
+ }
+
+ public static void Sshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix;
+
+ if (shift < 8)
+ {
+ bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+ // Extend sign-bit
+ bitMatrix |= 0x8080808080808080UL >> (64 - shift * 8);
+ }
+ else
+ {
+ // Replicate sign-bit into all bits
+ bitMatrix = 0x8080808080808080UL;
+ }
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx);
+ }
+ }
+
+ public static void Ssra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ssra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Uqrshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Uqrshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqrshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
+ }
+ else
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Uqshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Saturating);
+ }
+ }
+
+ public static void Uqshrn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+ }
+
+ public static void Uqshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
+ }
+ else
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+ }
+
+ public static void Urshl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Round);
+ }
+ }
+
+ public static void Urshr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Urshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Ursra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ursra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ushl_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.Scalar);
+ }
+ }
+
+ public static void Ushl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
+ }
+ else
+ {
+ EmitShlRegOp(context, ShlRegFlags.None);
+ }
+ }
+
+ public static void Ushll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseAdvSimd)
+ {
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
+ }
+ else if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movzxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Ushr_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+ }
+ }
+
+ public static void Ushr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx);
+ }
+ }
+
+ public static void Usra_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseAdvSimd)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
+ }
+ else
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Usra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseAdvSimd)
+ {
+ int shift = GetImmShr(op);
+
+ InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ [Flags]
+ private enum ShrImmFlags
+ {
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+
+ Round = 1 << 2,
+ Accumulate = 1 << 3,
+
+ ScalarSx = Scalar | Signed,
+ ScalarZx = Scalar,
+
+ VectorSx = Signed,
+ VectorZx = 0,
+ }
+
+ private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
+ }
+
+ private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
+ }
+
+ private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
+ }
+
+ private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
+ }
+
+ private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & ShrImmFlags.Scalar) != 0;
+ bool signed = (flags & ShrImmFlags.Signed) != 0;
+ bool round = (flags & ShrImmFlags.Round) != 0;
+ bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 3) */
+ {
+ e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
+ }
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = context.ShiftRightUI(e, Const(shift));
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0,
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ e = signedSrc ? EmitSignedSrcSatQ(context, e, op.Size, signedDst) : EmitUnsignedSrcSatQ(context, e, op.Size, signedDst);
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(d, res);
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
+
+ return context.Call(info, value, Const(roundConst), Const(shift));
+ }
+
+ private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
+ }
+
+ private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
+ }
+
+ private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSli(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+ int eSize = 8 << op.Size;
+
+ ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
+
+ if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = context.ShiftLeft(ne, Const(shift));
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSri(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize));
+
+ if (shift >= eSize)
+ {
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.UseGfni && op.Size == 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
+
+ Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
+
+ Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
+
+ Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
+
+ if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL);
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ [Flags]
+ private enum ShlRegFlags
+ {
+ None = 0,
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+ Round = 1 << 2,
+ Saturating = 1 << 3,
+ }
+
+ private static void EmitShlImmOp(ArmEmitterContext context, bool signedDst, ShlRegFlags flags = ShlRegFlags.None)
+ {
+ bool scalar = flags.HasFlag(ShlRegFlags.Scalar);
+ bool signed = flags.HasFlag(ShlRegFlags.Signed);
+ bool saturating = flags.HasFlag(ShlRegFlags.Saturating);
+
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ Operand e = !saturating
+ ? EmitShlImm(context, ne, GetImmShl(op), op.Size)
+ : EmitShlImmSatQ(context, ne, GetImmShl(op), op.Size, signed, signedDst);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitShlImm(ArmEmitterContext context, Operand op, int shiftLsB, int size)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand res = context.AllocateLocal(OperandType.I64);
+
+ if (shiftLsB >= eSize)
+ {
+ Operand shl = context.ShiftLeft(op, Const(shiftLsB));
+ context.Copy(res, shl);
+ }
+ else
+ {
+ Operand zeroL = Const(0L);
+ context.Copy(res, zeroL);
+ }
+
+ return res;
+ }
+
+ private static Operand EmitShlImmSatQ(ArmEmitterContext context, Operand op, int shiftLsB, int size, bool signedSrc, bool signedDst)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lblEnd = Label();
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ if (shiftLsB >= eSize)
+ {
+ context.Copy(res, signedSrc
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ }
+ else
+ {
+ Operand shl = context.ShiftLeft(op, Const(shiftLsB));
+ if (eSize == 64)
+ {
+ Operand sarOrShr = signedSrc
+ ? context.ShiftRightSI(shl, Const(shiftLsB))
+ : context.ShiftRightUI(shl, Const(shiftLsB));
+ context.Copy(res, shl);
+ context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal);
+ context.Copy(res, signedSrc
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ }
+ else
+ {
+ context.Copy(res, signedSrc
+ ? EmitSignedSrcSatQ(context, shl, size, signedDst)
+ : EmitUnsignedSrcSatQ(context, shl, size, signedDst));
+ }
+ }
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ private static void EmitShlRegOp(ArmEmitterContext context, ShlRegFlags flags = ShlRegFlags.None)
+ {
+ bool scalar = flags.HasFlag(ShlRegFlags.Scalar);
+ bool signed = flags.HasFlag(ShlRegFlags.Signed);
+ bool round = flags.HasFlag(ShlRegFlags.Round);
+ bool saturating = flags.HasFlag(ShlRegFlags.Saturating);
+
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, size: 0);
+
+ Operand e = !saturating
+ ? EmitShlReg(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed)
+ : EmitShlRegSatQ(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ // long SignedShlReg(long op, int shiftLsB, bool round, int size);
+ // ulong UnsignedShlReg(ulong op, int shiftLsB, bool round, int size);
+ private static Operand EmitShlReg(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(shiftLsB.Type == OperandType.I32);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zero = Const(0);
+ Operand zeroL = Const(0L);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
+ context.Copy(res, signed
+ ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
+ : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ Operand isGreaterOrEqual = context.ICompareGreaterOrEqual(shiftLsB, eSizeOp);
+ context.Copy(res, context.ConditionalSelect(isGreaterOrEqual, zeroL, shl));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // long SignedShlRegSatQ(long op, int shiftLsB, bool round, int size);
+ // ulong UnsignedShlRegSatQ(ulong op, int shiftLsB, bool round, int size);
+ private static Operand EmitShlRegSatQ(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
+ {
+ int eSize = 8 << size;
+
+ Debug.Assert(op.Type == OperandType.I64);
+ Debug.Assert(shiftLsB.Type == OperandType.I32);
+ Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
+
+ Operand lbl1 = Label();
+ Operand lbl2 = Label();
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zero = Const(0);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
+
+ context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
+ context.Copy(res, signed
+ ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
+ : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl1);
+ context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
+ context.BranchIf(lbl2, shiftLsB, eSizeOp, Comparison.Less);
+ context.Copy(res, signed
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lbl2);
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ if (eSize == 64)
+ {
+ Operand sarOrShr = signed
+ ? context.ShiftRightSI(shl, shiftLsB)
+ : context.ShiftRightUI(shl, shiftLsB);
+ context.Copy(res, shl);
+ context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal);
+ context.Copy(res, signed
+ ? EmitSignedSignSatQ(context, op, size)
+ : EmitUnsignedSignSatQ(context, op, size));
+ }
+ else
+ {
+ context.Copy(res, signed
+ ? EmitSignedSrcSatQ(context, shl, size, signedDst: true)
+ : EmitUnsignedSrcSatQ(context, shl, size, signedDst: false));
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+
+ // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ // long SignedShrReg(long op, int shift, bool round, int eSize);
+ private static Operand EmitSignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
+ {
+ if (round)
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroL = Const(0L);
+ Operand one = Const(1);
+ Operand oneL = Const(1L);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.GreaterOrEqual);
+ Operand roundConst = context.ShiftLeft(oneL, context.Subtract(shift, one));
+ Operand add = context.Add(op, roundConst);
+ Operand sar = context.ShiftRightSI(add, shift);
+ if (eSize == 64)
+ {
+ Operand shr = context.ShiftRightUI(add, shift);
+ Operand left = context.BitwiseAnd(context.Negate(op), context.BitwiseExclusiveOr(op, add));
+ Operand isLess = context.ICompareLess(left, zeroL);
+ context.Copy(res, context.ConditionalSelect(isLess, shr, sar));
+ }
+ else
+ {
+ context.Copy(res, sar);
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ else
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroL = Const(0L);
+ Operand negOneL = Const(-1L);
+
+ Operand sar = context.ShiftRightSI(op, shift);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sar);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
+ Operand isLess = context.ICompareLess(op, zeroL);
+ context.Copy(res, context.ConditionalSelect(isLess, negOneL, zeroL));
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ }
+
+ // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ // ulong UnsignedShrReg(ulong op, int shift, bool round, int eSize);
+ private static Operand EmitUnsignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
+ {
+ if (round)
+ {
+ Operand lblEnd = Label();
+
+ Operand zeroUL = Const(0UL);
+ Operand one = Const(1);
+ Operand oneUL = Const(1UL);
+ Operand eSizeMaxOp = Const(64);
+ Operand oneShl63UL = Const(1UL << 63);
+
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
+
+ context.BranchIf(lblEnd, shift, eSizeMaxOp, Comparison.Greater);
+ Operand roundConst = context.ShiftLeft(oneUL, context.Subtract(shift, one));
+ Operand add = context.Add(op, roundConst);
+ Operand shr = context.ShiftRightUI(add, shift);
+ Operand isEqual = context.ICompareEqual(shift, eSizeMaxOp);
+ context.Copy(res, context.ConditionalSelect(isEqual, zeroUL, shr));
+ if (eSize == 64)
+ {
+ context.BranchIf(lblEnd, add, op, Comparison.GreaterOrEqualUI);
+ Operand right = context.BitwiseOr(shr, context.ShiftRightUI(oneShl63UL, context.Subtract(shift, one)));
+ context.Copy(res, context.ConditionalSelect(isEqual, oneUL, right));
+ }
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ else
+ {
+ Operand lblEnd = Label();
+
+ Operand eSizeOp = Const(eSize);
+ Operand zeroUL = Const(0UL);
+
+ Operand shr = context.ShiftRightUI(op, shift);
+ Operand res = context.Copy(context.AllocateLocal(OperandType.I64), shr);
+
+ context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
+ context.Copy(res, zeroUL);
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblEnd);
+
+ return res;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs
new file mode 100644
index 0000000..eb28a0c
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs
@@ -0,0 +1,450 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Vqrshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitRoundShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Vqrshrun(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Vqshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Vqshrun(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Vrshr(ArmEmitterContext context)
+ {
+ EmitRoundShrImmOp(context, accumulate: false);
+ }
+
+ public static void Vrshrn(ArmEmitterContext context)
+ {
+ EmitRoundShrImmNarrowOp(context, signed: false);
+ }
+
+ public static void Vrsra(ArmEmitterContext context)
+ {
+ EmitRoundShrImmOp(context, accumulate: true);
+ }
+
+ public static void Vshl(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift)));
+ }
+
+ public static void Vshl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U)
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false));
+ }
+ }
+
+ public static void Vshll(ArmEmitterContext context)
+ {
+ OpCode32SimdShImmLong op = (OpCode32SimdShImmLong)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+ if (op.Size == 2)
+ {
+ if (op.U)
+ {
+ me = context.ZeroExtend32(OperandType.I64, me);
+ }
+ else
+ {
+ me = context.SignExtend32(OperandType.I64, me);
+ }
+ }
+
+ me = context.ShiftLeft(me, Const(op.Shift));
+
+ res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vshll2(ArmEmitterContext context)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+ if (op.Size == 2)
+ {
+ if (op.U)
+ {
+ me = context.ZeroExtend32(OperandType.I64, me);
+ }
+ else
+ {
+ me = context.SignExtend32(OperandType.I64, me);
+ }
+ }
+
+ me = context.ShiftLeft(me, Const(8 << op.Size));
+
+ res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void Vshr(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ int maxShift = (8 << op.Size) - 1;
+
+ if (op.U)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift)));
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift))));
+ }
+ }
+
+ public static void Vshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
+ }
+
+ public static void Vsli_I(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = op.Shift;
+ int eSize = 8 << op.Size;
+
+ ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
+
+ Operand res = GetVec(op.Qd);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand me = EmitVectorExtractZx(context, op.Qm, op.Im + index, op.Size);
+
+ Operand neShifted = context.ShiftLeft(me, Const(shift));
+
+ Operand de = EmitVectorExtractZx(context, op.Qd, op.Id + index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Qd), res);
+ }
+
+ public static void Vsra(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ int maxShift = (8 << op.Size) - 1;
+
+ if (op.U)
+ {
+ EmitVectorImmBinaryQdQmOpZx32(context, (op1, op2) =>
+ {
+ Operand shiftRes = shift > maxShift ? Const(op2.Type, 0) : context.ShiftRightUI(op2, Const(shift));
+
+ return context.Add(op1, shiftRes);
+ });
+ }
+ else
+ {
+ EmitVectorImmBinaryQdQmOpSx32(context, (op1, op2) => context.Add(op1, context.ShiftRightSI(op2, Const(Math.Min(maxShift, shift)))));
+ }
+ }
+
+ public static void EmitRoundShrImmOp(ArmEmitterContext context, bool accumulate)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ if (op.U)
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightUI(op1, Const(shift));
+ }, accumulate);
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift)));
+ }, accumulate);
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift), accumulate);
+ }
+ }
+ else
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightSI(op1, Const(shift));
+ }, accumulate);
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift)));
+ }, accumulate);
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift), accumulate);
+ }
+ }
+ }
+
+ private static void EmitRoundShrImmNarrowOp(ArmEmitterContext context, bool signed)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) =>
+ {
+ if (op.Size <= 1)
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+ op1 = signed ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ op1 = EmitShrImm64(context, op1, signed, roundConst, shift); // shift <= 32
+ }
+
+ return op1;
+ }, signed);
+ }
+
+ private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned)
+ {
+ if (shiftLsB.Type == OperandType.I64)
+ {
+ shiftLsB = context.ConvertI64ToI32(shiftLsB);
+ }
+
+ shiftLsB = context.SignExtend8(OperandType.I32, shiftLsB);
+ Debug.Assert((uint)size < 4u);
+
+ Operand negShiftLsB = context.Negate(shiftLsB);
+
+ Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0));
+
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+ Operand shr = unsigned ? context.ShiftRightUI(op, negShiftLsB) : context.ShiftRightSI(op, negShiftLsB);
+
+ Operand res = context.ConditionalSelect(isPositive, shl, shr);
+
+ if (unsigned)
+ {
+ Operand isOutOfRange = context.BitwiseOr(
+ context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)),
+ context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)));
+
+ return context.ConditionalSelect(isOutOfRange, Const(op.Type, 0), res);
+ }
+ else
+ {
+ Operand isOutOfRange0 = context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size));
+ Operand isOutOfRangeN = context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size));
+
+ // Also zero if shift is too negative, but value was positive.
+ isOutOfRange0 = context.BitwiseOr(isOutOfRange0, context.BitwiseAnd(isOutOfRangeN, context.ICompareGreaterOrEqual(op, Const(op.Type, 0))));
+
+ Operand min = (op.Type == OperandType.I64) ? Const(-1L) : Const(-1);
+
+ return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res));
+ }
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0,
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ if (scalar)
+ {
+ // TODO: Support scalar operation.
+ throw new NotImplementedException();
+ }
+
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) =>
+ {
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+ }
+
+ op1 = signedSrc ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ return EmitSatQ(context, op1, 8 << op.Size, signedSrc, signedDst);
+ }, signedSrc);
+ }
+
+ private static int GetImmShr(OpCode32SimdShImm op)
+ {
+ return (8 << op.Size) - op.Shift; // Shr amount is flipped.
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ MethodInfo info = signed
+ ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
+ : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
+
+ return context.Call(info, value, Const(roundConst), Const(shift));
+ }
+
+ private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signedSrc, bool signedDst)
+ {
+ Debug.Assert(eSize <= 32);
+
+ long intMin = signedDst ? -(1L << (eSize - 1)) : 0;
+ long intMax = signedDst ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1;
+
+ Operand gt = signedSrc
+ ? context.ICompareGreater(value, Const(value.Type, intMax))
+ : context.ICompareGreaterUI(value, Const(value.Type, intMax));
+
+ Operand lt = signedSrc
+ ? context.ICompareLess(value, Const(value.Type, intMin))
+ : context.ICompareLessUI(value, Const(value.Type, intMin));
+
+ value = context.ConditionalSelect(gt, Const(value.Type, intMax), value);
+ value = context.ConditionalSelect(lt, Const(value.Type, intMin), value);
+
+ Operand lblNoSat = Label();
+
+ context.BranchIfFalse(lblNoSat, context.BitwiseOr(gt, lt));
+
+ SetFpFlag(context, FPState.QcFlag, Const(1));
+
+ context.MarkLabel(lblNoSat);
+
+ return value;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSystem.cs b/src/ARMeilleure/Instructions/InstEmitSystem.cs
new file mode 100644
index 0000000..8c430fc
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -0,0 +1,278 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Reflection;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int DczSizeLog2 = 4; // Log2 size in words
+ public const int DczSizeInBytes = 4 << DczSizeLog2;
+
+ public static void Isb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ MethodInfo info;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0000_0000_001:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0));
+ break;
+ case 0b11_011_0000_0000_111:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0));
+ break;
+ case 0b11_011_0100_0010_000:
+ EmitGetNzcv(context);
+ return;
+ case 0b11_011_0100_0100_000:
+ EmitGetFpcr(context);
+ return;
+ case 0b11_011_0100_0100_001:
+ EmitGetFpsr(context);
+ return;
+ case 0b11_011_1101_0000_010:
+ EmitGetTpidrEl0(context);
+ return;
+ case 0b11_011_1101_0000_011:
+ EmitGetTpidrroEl0(context);
+ return;
+ case 0b11_011_1110_0000_000:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0));
+ break;
+ case 0b11_011_1110_0000_001:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0));
+ break;
+ case 0b11_011_1110_0000_010:
+ info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0));
+ break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+
+ SetIntOrZR(context, op.Rt, context.Call(info));
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0100_0010_000:
+ EmitSetNzcv(context);
+ return;
+ case 0b11_011_0100_0100_000:
+ EmitSetFpcr(context);
+ return;
+ case 0b11_011_0100_0100_001:
+ EmitSetFpsr(context);
+ return;
+ case 0b11_011_1101_0000_010:
+ EmitSetTpidrEl0(context);
+ return;
+
+ default:
+ throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+ }
+
+ public static void Nop(ArmEmitterContext context)
+ {
+ // Do nothing.
+ }
+
+ public static void Sys(ArmEmitterContext context)
+ {
+ // This instruction is used to do some operations on the CPU like cache invalidation,
+ // address translation and the like.
+ // We treat it as no-op here since we don't have any cache being emulated anyway.
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0111_0100_001:
+ {
+ // DC ZVA
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ for (long offset = 0; offset < DczSizeInBytes; offset += 8)
+ {
+ Operand address = context.Add(t, Const(offset));
+
+ InstEmitMemoryHelper.EmitStore(context, address, RegisterConsts.ZeroIndex, 3);
+ }
+
+ break;
+ }
+
+ // No-op
+ case 0b11_011_0111_1110_001: // DC CIVAC
+ break;
+
+ case 0b11_011_0111_0101_001: // IC IVAU
+ Operand target = Register(op.Rt, RegisterType.Integer, OperandType.I64);
+ context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.InvalidateCacheLine)), target);
+ break;
+ }
+ }
+
+ private static int GetPackedId(OpCodeSystem op)
+ {
+ int id;
+
+ id = op.Op2 << 0;
+ id |= op.CRm << 3;
+ id |= op.CRn << 7;
+ id |= op.Op1 << 11;
+ id |= op.Op0 << 14;
+
+ return id;
+ }
+
+ private static void EmitGetNzcv(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nzcv = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag)));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag)));
+ nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag)));
+
+ SetIntOrZR(context, op.Rt, nzcv);
+ }
+
+ private static void EmitGetFpcr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand fpcr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPCR.Mask.HasFlag((FPCR)(1u << flag)))
+ {
+ fpcr = context.BitwiseOr(fpcr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntOrZR(context, op.Rt, fpcr);
+ }
+
+ private static void EmitGetFpsr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ context.SyncQcFlag();
+
+ Operand fpsr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSR.Mask.HasFlag((FPSR)(1u << flag)))
+ {
+ fpsr = context.BitwiseOr(fpsr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntOrZR(context, op.Rt, fpsr);
+ }
+
+ private static void EmitGetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())));
+
+ SetIntOrZR(context, op.Rt, result);
+ }
+
+ private static void EmitGetTpidrroEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset())));
+
+ SetIntOrZR(context, op.Rt, result);
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand nzcv = GetIntOrZR(context, op.Rt);
+ nzcv = context.ConvertI64ToI32(nzcv);
+
+ SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.VFlag)), Const(1)));
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.CFlag)), Const(1)));
+ SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.ZFlag)), Const(1)));
+ SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.NFlag)), Const(1)));
+ }
+
+ private static void EmitSetFpcr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand fpcr = GetIntOrZR(context, op.Rt);
+ fpcr = context.ConvertI64ToI32(fpcr);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPCR.Mask.HasFlag((FPCR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpcr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static void EmitSetFpsr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ context.ClearQcFlagIfModified();
+
+ Operand fpsr = GetIntOrZR(context, op.Rt);
+ fpsr = context.ConvertI64ToI32(fpsr);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSR.Mask.HasFlag((FPSR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpsr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static void EmitSetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Operand value = GetIntOrZR(context, op.Rt);
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), value);
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstEmitSystem32.cs b/src/ARMeilleure/Instructions/InstEmitSystem32.cs
new file mode 100644
index 0000000..74d6169
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstEmitSystem32.cs
@@ -0,0 +1,338 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Reflection;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Mcr(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15 || op.Opc1 != 0)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ switch (op.CRn)
+ {
+ case 13: // Process and Thread Info.
+ if (op.CRm != 0)
+ {
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ switch (op.Opc2)
+ {
+ case 2:
+ EmitSetTpidrEl0(context);
+ return;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ case 7:
+ switch (op.CRm) // Cache and Memory barrier.
+ {
+ case 10:
+ switch (op.Opc2)
+ {
+ case 5: // Data Memory Barrier Register.
+ return; // No-op.
+
+ default:
+ throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
+ }
+
+ default:
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X}).");
+ }
+
+ default:
+ throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+ }
+
+ public static void Mrc(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15 || op.Opc1 != 0)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ Operand result;
+
+ switch (op.CRn)
+ {
+ case 13: // Process and Thread Info.
+ if (op.CRm != 0)
+ {
+ throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X}).");
+ }
+
+ result = op.Opc2 switch
+ {
+ 2 => EmitGetTpidrEl0(context),
+ 3 => EmitGetTpidrroEl0(context),
+ _ => throw new NotImplementedException(
+ $"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X})."),
+ };
+
+ break;
+
+ default:
+ throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+
+ if (op.Rt == RegisterAlias.Aarch32Pc)
+ {
+ // Special behavior: copy NZCV flags into APSR.
+ EmitSetNzcv(context, result);
+
+ return;
+ }
+ else
+ {
+ SetIntA32(context, op.Rt, result);
+ }
+ }
+
+ public static void Mrrc(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ if (op.Coproc != 15)
+ {
+ InstEmit.Und(context);
+
+ return;
+ }
+
+ int opc = op.MrrcOp;
+ MethodInfo info = op.CRm switch
+ {
+ // Timer.
+ 14 => opc switch
+ {
+ 0 => typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)),
+ _ => throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X} at 0x{op.Address:X} (0x{op.RawOpCode:X})."),
+ },
+ _ => throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X} at 0x{op.Address:X}."),
+ };
+ Operand result = context.Call(info);
+
+ SetIntA32(context, op.Rt, context.ConvertI64ToI32(result));
+ SetIntA32(context, op.CRn, context.ConvertI64ToI32(context.ShiftRightUI(result, Const(32))));
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCode32Mrs op = (OpCode32Mrs)context.CurrOp;
+
+ if (op.R)
+ {
+ throw new NotImplementedException("SPSR");
+ }
+ else
+ {
+ Operand spsr = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag)));
+ spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.QFlag), Const((int)PState.QFlag)));
+
+ // TODO: Remaining flags.
+
+ SetIntA32(context, op.Rd, spsr);
+ }
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCode32MsrReg op = (OpCode32MsrReg)context.CurrOp;
+
+ if (op.R)
+ {
+ throw new NotImplementedException("SPSR");
+ }
+ else
+ {
+ if ((op.Mask & 8) != 0)
+ {
+ Operand value = GetIntA32(context, op.Rn);
+
+ EmitSetNzcv(context, value);
+
+ Operand q = context.BitwiseAnd(context.ShiftRightUI(value, Const((int)PState.QFlag)), Const(1));
+
+ SetFlag(context, PState.QFlag, q);
+ }
+
+ if ((op.Mask & 4) != 0)
+ {
+ throw new NotImplementedException("APSR_g");
+ }
+
+ if ((op.Mask & 2) != 0)
+ {
+ throw new NotImplementedException("CPSR_x");
+ }
+
+ if ((op.Mask & 1) != 0)
+ {
+ throw new NotImplementedException("CPSR_c");
+ }
+ }
+ }
+
+ public static void Nop(ArmEmitterContext context) { }
+
+ public static void Vmrs(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ if (op.Rt == RegisterAlias.Aarch32Pc && op.Sreg == 0b0001)
+ {
+ // Special behavior: copy NZCV flags into APSR.
+ SetFlag(context, PState.VFlag, GetFpFlag(FPState.VFlag));
+ SetFlag(context, PState.CFlag, GetFpFlag(FPState.CFlag));
+ SetFlag(context, PState.ZFlag, GetFpFlag(FPState.ZFlag));
+ SetFlag(context, PState.NFlag, GetFpFlag(FPState.NFlag));
+
+ return;
+ }
+
+ switch (op.Sreg)
+ {
+ case 0b0000: // FPSID
+ throw new NotImplementedException("Supervisor Only");
+ case 0b0001: // FPSCR
+ EmitGetFpscr(context);
+ return;
+ case 0b0101: // MVFR2
+ throw new NotImplementedException("MVFR2");
+ case 0b0110: // MVFR1
+ throw new NotImplementedException("MVFR1");
+ case 0b0111: // MVFR0
+ throw new NotImplementedException("MVFR0");
+ case 0b1000: // FPEXC
+ throw new NotImplementedException("Supervisor Only");
+ default:
+ throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+ }
+
+ public static void Vmsr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ switch (op.Sreg)
+ {
+ case 0b0000: // FPSID
+ throw new NotImplementedException("Supervisor Only");
+ case 0b0001: // FPSCR
+ EmitSetFpscr(context);
+ return;
+ case 0b0101: // MVFR2
+ throw new NotImplementedException("MVFR2");
+ case 0b0110: // MVFR1
+ throw new NotImplementedException("MVFR1");
+ case 0b0111: // MVFR0
+ throw new NotImplementedException("MVFR0");
+ case 0b1000: // FPEXC
+ throw new NotImplementedException("Supervisor Only");
+ default:
+ throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X} at 0x{op.Address:X}.");
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand t)
+ {
+ Operand v = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.VFlag)), Const(1));
+ Operand c = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.CFlag)), Const(1));
+ Operand z = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.ZFlag)), Const(1));
+ Operand n = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.NFlag)), Const(1));
+
+ SetFlag(context, PState.VFlag, v);
+ SetFlag(context, PState.CFlag, c);
+ SetFlag(context, PState.ZFlag, z);
+ SetFlag(context, PState.NFlag, n);
+ }
+
+ private static void EmitGetFpscr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ Operand fpscr = Const(0);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag)))
+ {
+ fpscr = context.BitwiseOr(fpscr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag)));
+ }
+ }
+
+ SetIntA32(context, op.Rt, fpscr);
+ }
+
+ private static void EmitSetFpscr(ArmEmitterContext context)
+ {
+ OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp;
+
+ Operand fpscr = GetIntA32(context, op.Rt);
+
+ for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
+ {
+ if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag)))
+ {
+ SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpscr, Const(flag)), Const(1)));
+ }
+ }
+
+ context.UpdateArmFpMode();
+ }
+
+ private static Operand EmitGetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())));
+ }
+
+ private static Operand EmitGetTpidrroEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset())));
+ }
+
+ private static void EmitSetTpidrEl0(ArmEmitterContext context)
+ {
+ OpCode32System op = (OpCode32System)context.CurrOp;
+
+ Operand value = GetIntA32(context, op.Rt);
+
+ Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
+
+ context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), context.ZeroExtend32(OperandType.I64, value));
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs
new file mode 100644
index 0000000..74c3315
--- /dev/null
+++ b/src/ARMeilleure/Instructions/InstName.cs
@@ -0,0 +1,699 @@
+namespace ARMeilleure.Instructions
+{
+ enum InstName
+ {
+ // Base (AArch64)
+ Adc,
+ Adcs,
+ Add,
+ Adds,
+ Adr,
+ Adrp,
+ And,
+ Ands,
+ Asrv,
+ B,
+ B_Cond,
+ Bfm,
+ Bic,
+ Bics,
+ Bl,
+ Blr,
+ Br,
+ Brk,
+ Cbnz,
+ Cbz,
+ Ccmn,
+ Ccmp,
+ Clrex,
+ Cls,
+ Clz,
+ Crc32b,
+ Crc32h,
+ Crc32w,
+ Crc32x,
+ Crc32cb,
+ Crc32ch,
+ Crc32cw,
+ Crc32cx,
+ Csdb,
+ Csel,
+ Csinc,
+ Csinv,
+ Csneg,
+ Dmb,
+ Dsb,
+ Eon,
+ Eor,
+ Esb,
+ Extr,
+ Hint,
+ Isb,
+ It,
+ Ldar,
+ Ldaxp,
+ Ldaxr,
+ Ldp,
+ Ldr,
+ Ldr_Literal,
+ Ldrs,
+ Ldxr,
+ Ldxp,
+ Lslv,
+ Lsrv,
+ Madd,
+ Movk,
+ Movn,
+ Movz,
+ Mrs,
+ Msr,
+ Msub,
+ Nop,
+ Orn,
+ Orr,
+ Prfm,
+ Rbit,
+ Ret,
+ Rev16,
+ Rev32,
+ Rev64,
+ Rorv,
+ Sbc,
+ Sbcs,
+ Sbfm,
+ Sdiv,
+ Sel,
+ Sev,
+ Sevl,
+ Shsub8,
+ Smaddl,
+ Smsubl,
+ Smulh,
+ Smull,
+ Smulw_,
+ Ssat,
+ Ssat16,
+ Stlr,
+ Stlxp,
+ Stlxr,
+ Stp,
+ Str,
+ Stxp,
+ Stxr,
+ Sub,
+ Subs,
+ Svc,
+ Sxtb,
+ Sxth,
+ Sys,
+ Tbnz,
+ Tbz,
+ Tsb,
+ Ubfm,
+ Udiv,
+ Umaddl,
+ Umsubl,
+ Umulh,
+ Und,
+ Wfe,
+ Wfi,
+ Yield,
+
+ // FP & SIMD (AArch64)
+ Abs_S,
+ Abs_V,
+ Add_S,
+ Add_V,
+ Addhn_V,
+ Addp_S,
+ Addp_V,
+ Addv_V,
+ Aesd_V,
+ Aese_V,
+ Aesimc_V,
+ Aesmc_V,
+ And_V,
+ Bic_V,
+ Bic_Vi,
+ Bif_V,
+ Bit_V,
+ Bsl_V,
+ Cls_V,
+ Clz_V,
+ Cmeq_S,
+ Cmeq_V,
+ Cmge_S,
+ Cmge_V,
+ Cmgt_S,
+ Cmgt_V,
+ Cmhi_S,
+ Cmhi_V,
+ Cmhs_S,
+ Cmhs_V,
+ Cmle_S,
+ Cmle_V,
+ Cmlt_S,
+ Cmlt_V,
+ Cmtst_S,
+ Cmtst_V,
+ Cnt_V,
+ Dup_Gp,
+ Dup_S,
+ Dup_V,
+ Eor_V,
+ Ext_V,
+ Fabd_S,
+ Fabd_V,
+ Fabs_S,
+ Fabs_V,
+ Facge_S,
+ Facge_V,
+ Facgt_S,
+ Facgt_V,
+ Fadd_S,
+ Fadd_V,
+ Faddp_S,
+ Faddp_V,
+ Fccmp_S,
+ Fccmpe_S,
+ Fcmeq_S,
+ Fcmeq_V,
+ Fcmge_S,
+ Fcmge_V,
+ Fcmgt_S,
+ Fcmgt_V,
+ Fcmle_S,
+ Fcmle_V,
+ Fcmlt_S,
+ Fcmlt_V,
+ Fcmp_S,
+ Fcmpe_S,
+ Fcsel_S,
+ Fcvt_S,
+ Fcvtas_Gp,
+ Fcvtas_S,
+ Fcvtas_V,
+ Fcvtau_Gp,
+ Fcvtau_S,
+ Fcvtau_V,
+ Fcvtl_V,
+ Fcvtms_Gp,
+ Fcvtms_V,
+ Fcvtmu_Gp,
+ Fcvtn_V,
+ Fcvtns_Gp,
+ Fcvtns_S,
+ Fcvtns_V,
+ Fcvtnu_S,
+ Fcvtnu_V,
+ Fcvtps_Gp,
+ Fcvtpu_Gp,
+ Fcvtzs_Gp,
+ Fcvtzs_Gp_Fixed,
+ Fcvtzs_S,
+ Fcvtzs_V,
+ Fcvtzs_V_Fixed,
+ Fcvtzu_Gp,
+ Fcvtzu_Gp_Fixed,
+ Fcvtzu_S,
+ Fcvtzu_V,
+ Fcvtzu_V_Fixed,
+ Fdiv_S,
+ Fdiv_V,
+ Fmadd_S,
+ Fmax_S,
+ Fmax_V,
+ Fmaxnm_S,
+ Fmaxnm_V,
+ Fmaxnmp_S,
+ Fmaxnmp_V,
+ Fmaxnmv_V,
+ Fmaxp_S,
+ Fmaxp_V,
+ Fmaxv_V,
+ Fmin_S,
+ Fmin_V,
+ Fminnm_S,
+ Fminnm_V,
+ Fminnmp_S,
+ Fminnmp_V,
+ Fminnmv_V,
+ Fminp_S,
+ Fminp_V,
+ Fminv_V,
+ Fmla_Se,
+ Fmla_V,
+ Fmla_Ve,
+ Fmls_Se,
+ Fmls_V,
+ Fmls_Ve,
+ Fmov_S,
+ Fmov_Si,
+ Fmov_Vi,
+ Fmov_Ftoi,
+ Fmov_Itof,
+ Fmov_Ftoi1,
+ Fmov_Itof1,
+ Fmsub_S,
+ Fmul_S,
+ Fmul_Se,
+ Fmul_V,
+ Fmul_Ve,
+ Fmulx_S,
+ Fmulx_Se,
+ Fmulx_V,
+ Fmulx_Ve,
+ Fneg_S,
+ Fneg_V,
+ Fnmadd_S,
+ Fnmsub_S,
+ Fnmul_S,
+ Frecpe_S,
+ Frecpe_V,
+ Frecps_S,
+ Frecps_V,
+ Frecpx_S,
+ Frinta_S,
+ Frinta_V,
+ Frinti_S,
+ Frinti_V,
+ Frintm_S,
+ Frintm_V,
+ Frintn_S,
+ Frintn_V,
+ Frintp_S,
+ Frintp_V,
+ Frintx_S,
+ Frintx_V,
+ Frintz_S,
+ Frintz_V,
+ Frsqrte_S,
+ Frsqrte_V,
+ Frsqrts_S,
+ Frsqrts_V,
+ Fsqrt_S,
+ Fsqrt_V,
+ Fsub_S,
+ Fsub_V,
+ Ins_Gp,
+ Ins_V,
+ Ld__Vms,
+ Ld__Vss,
+ Mla_V,
+ Mla_Ve,
+ Mls_V,
+ Mls_Ve,
+ Movi_V,
+ Mul_V,
+ Mul_Ve,
+ Mvni_V,
+ Neg_S,
+ Neg_V,
+ Not_V,
+ Orn_V,
+ Orr_V,
+ Orr_Vi,
+ Pmull_V,
+ Raddhn_V,
+ Rbit_V,
+ Rev16_V,
+ Rev32_V,
+ Rev64_V,
+ Rshrn_V,
+ Rsubhn_V,
+ Saba_V,
+ Sabal_V,
+ Sabd_V,
+ Sabdl_V,
+ Sadalp_V,
+ Saddl_V,
+ Saddlp_V,
+ Saddlv_V,
+ Saddw_V,
+ Scvtf_Gp,
+ Scvtf_Gp_Fixed,
+ Scvtf_S,
+ Scvtf_S_Fixed,
+ Scvtf_V,
+ Scvtf_V_Fixed,
+ Sha1c_V,
+ Sha1h_V,
+ Sha1m_V,
+ Sha1p_V,
+ Sha1su0_V,
+ Sha1su1_V,
+ Sha256h_V,
+ Sha256h2_V,
+ Sha256su0_V,
+ Sha256su1_V,
+ Shadd_V,
+ Shl_S,
+ Shl_V,
+ Shll_V,
+ Shrn_V,
+ Shsub_V,
+ Sli_S,
+ Sli_V,
+ Smax_V,
+ Smaxp_V,
+ Smaxv_V,
+ Smin_V,
+ Sminp_V,
+ Sminv_V,
+ Smlal_V,
+ Smlal_Ve,
+ Smlsl_V,
+ Smlsl_Ve,
+ Smov_S,
+ Smull_V,
+ Smull_Ve,
+ Sqabs_S,
+ Sqabs_V,
+ Sqadd_S,
+ Sqadd_V,
+ Sqdmulh_S,
+ Sqdmulh_V,
+ Sqdmulh_Ve,
+ Sqneg_S,
+ Sqneg_V,
+ Sqrdmulh_S,
+ Sqrdmulh_V,
+ Sqrdmulh_Ve,
+ Sqrshl_V,
+ Sqrshrn_S,
+ Sqrshrn_V,
+ Sqrshrun_S,
+ Sqrshrun_V,
+ Sqshl_Si,
+ Sqshl_V,
+ Sqshl_Vi,
+ Sqshrn_S,
+ Sqshrn_V,
+ Sqshrun_S,
+ Sqshrun_V,
+ Sqsub_S,
+ Sqsub_V,
+ Sqxtn_S,
+ Sqxtn_V,
+ Sqxtun_S,
+ Sqxtun_V,
+ Srhadd_V,
+ Sri_S,
+ Sri_V,
+ Srshl_V,
+ Srshr_S,
+ Srshr_V,
+ Srsra_S,
+ Srsra_V,
+ Sshl_S,
+ Sshl_V,
+ Sshll_V,
+ Sshr_S,
+ Sshr_V,
+ Ssra_S,
+ Ssra_V,
+ Ssubl_V,
+ Ssubw_V,
+ St__Vms,
+ St__Vss,
+ Sub_S,
+ Sub_V,
+ Subhn_V,
+ Suqadd_S,
+ Suqadd_V,
+ Tbl_V,
+ Tbx_V,
+ Trn1_V,
+ Trn2_V,
+ Uaba_V,
+ Uabal_V,
+ Uabd_V,
+ Uabdl_V,
+ Uadalp_V,
+ Uaddl_V,
+ Uaddlp_V,
+ Uaddlv_V,
+ Uaddw_V,
+ Ucvtf_Gp,
+ Ucvtf_Gp_Fixed,
+ Ucvtf_S,
+ Ucvtf_S_Fixed,
+ Ucvtf_V,
+ Ucvtf_V_Fixed,
+ Uhadd_V,
+ Uhsub_V,
+ Umax_V,
+ Umaxp_V,
+ Umaxv_V,
+ Umin_V,
+ Uminp_V,
+ Uminv_V,
+ Umlal_V,
+ Umlal_Ve,
+ Umlsl_V,
+ Umlsl_Ve,
+ Umov_S,
+ Umull_V,
+ Umull_Ve,
+ Uqadd_S,
+ Uqadd_V,
+ Uqrshl_V,
+ Uqrshrn_S,
+ Uqrshrn_V,
+ Uqshl_V,
+ Uqshrn_S,
+ Uqshrn_V,
+ Uqsub_S,
+ Uqsub_V,
+ Uqxtn_S,
+ Uqxtn_V,
+ Urhadd_V,
+ Urshl_V,
+ Urshr_S,
+ Urshr_V,
+ Ursra_S,
+ Ursra_V,
+ Ushl_S,
+ Ushl_V,
+ Ushll_V,
+ Ushr_S,
+ Ushr_V,
+ Usqadd_S,
+ Usqadd_V,
+ Usra_S,
+ Usra_V,
+ Usubl_V,
+ Usubw_V,
+ Uzp1_V,
+ Uzp2_V,
+ Xtn_V,
+ Zip1_V,
+ Zip2_V,
+
+ // Base (AArch32)
+ Bfc,
+ Bfi,
+ Blx,
+ Bx,
+ Cmp,
+ Cmn,
+ Movt,
+ Mul,
+ Lda,
+ Ldab,
+ Ldaex,
+ Ldaexb,
+ Ldaexd,
+ Ldaexh,
+ Ldah,
+ Ldm,
+ Ldrb,
+ Ldrd,
+ Ldrex,
+ Ldrexb,
+ Ldrexd,
+ Ldrexh,
+ Ldrh,
+ Ldrsb,
+ Ldrsh,
+ Mcr,
+ Mla,
+ Mls,
+ Mov,
+ Mrc,
+ Mrrc,
+ Mvn,
+ Pkh,
+ Pld,
+ Pop,
+ Push,
+ Qadd16,
+ Rev,
+ Revsh,
+ Rsb,
+ Rsc,
+ Sadd8,
+ Sbfx,
+ Shadd8,
+ Smla__,
+ Smlal,
+ Smlal__,
+ Smlaw_,
+ Smmla,
+ Smmls,
+ Smul__,
+ Smmul,
+ Ssub8,
+ Stl,
+ Stlb,
+ Stlex,
+ Stlexb,
+ Stlexd,
+ Stlexh,
+ Stlh,
+ Stm,
+ Strb,
+ Strd,
+ Strex,
+ Strexb,
+ Strexd,
+ Strexh,
+ Strh,
+ Sxtb16,
+ Tbb,
+ Tbh,
+ Teq,
+ Trap,
+ Tst,
+ Uadd8,
+ Ubfx,
+ Uhadd8,
+ Uhsub8,
+ Umaal,
+ Umlal,
+ Umull,
+ Uqadd16,
+ Uqadd8,
+ Uqsub16,
+ Uqsub8,
+ Usat,
+ Usat16,
+ Usub8,
+ Uxtb,
+ Uxtb16,
+ Uxth,
+
+ // FP & SIMD (AArch32)
+ Vabd,
+ Vabdl,
+ Vabs,
+ Vadd,
+ Vaddl,
+ Vaddw,
+ Vand,
+ Vbic,
+ Vbif,
+ Vbit,
+ Vbsl,
+ Vceq,
+ Vcge,
+ Vcgt,
+ Vcle,
+ Vclt,
+ Vcmp,
+ Vcmpe,
+ Vcnt,
+ Vcvt,
+ Vdiv,
+ Vdup,
+ Veor,
+ Vext,
+ Vfma,
+ Vfms,
+ Vfnma,
+ Vfnms,
+ Vhadd,
+ Vld1,
+ Vld2,
+ Vld3,
+ Vld4,
+ Vldm,
+ Vldr,
+ Vmax,
+ Vmaxnm,
+ Vmin,
+ Vminnm,
+ Vmla,
+ Vmlal,
+ Vmls,
+ Vmlsl,
+ Vmov,
+ Vmovl,
+ Vmovn,
+ Vmrs,
+ Vmsr,
+ Vmul,
+ Vmull,
+ Vmvn,
+ Vneg,
+ Vnmul,
+ Vnmla,
+ Vnmls,
+ Vorn,
+ Vorr,
+ Vpadd,
+ Vpadal,
+ Vpaddl,
+ Vpmax,
+ Vpmin,
+ Vqadd,
+ Vqdmulh,
+ Vqmovn,
+ Vqmovun,
+ Vqrdmulh,
+ Vqrshrn,
+ Vqrshrun,
+ Vqshrn,
+ Vqshrun,
+ Vqsub,
+ Vrev,
+ Vrhadd,
+ Vrint,
+ Vrinta,
+ Vrintm,
+ Vrintn,
+ Vrintp,
+ Vrintr,
+ Vrintx,
+ Vrshr,
+ Vrshrn,
+ Vsel,
+ Vshl,
+ Vshll,
+ Vshr,
+ Vshrn,
+ Vsli,
+ Vst1,
+ Vst2,
+ Vst3,
+ Vst4,
+ Vstm,
+ Vstr,
+ Vsqrt,
+ Vrecpe,
+ Vrecps,
+ Vrsqrte,
+ Vrsqrts,
+ Vrsra,
+ Vsra,
+ Vsub,
+ Vsubl,
+ Vsubw,
+ Vswp,
+ Vtbl,
+ Vtrn,
+ Vtst,
+ Vuzp,
+ Vzip,
+ }
+}
diff --git a/src/ARMeilleure/Instructions/NativeInterface.cs b/src/ARMeilleure/Instructions/NativeInterface.cs
new file mode 100644
index 0000000..0cd3754
--- /dev/null
+++ b/src/ARMeilleure/Instructions/NativeInterface.cs
@@ -0,0 +1,195 @@
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class NativeInterface
+ {
+ private class ThreadContext
+ {
+ public ExecutionContext Context { get; }
+ public IMemoryManager Memory { get; }
+ public Translator Translator { get; }
+
+ public ThreadContext(ExecutionContext context, IMemoryManager memory, Translator translator)
+ {
+ Context = context;
+ Memory = memory;
+ Translator = translator;
+ }
+ }
+
+ [ThreadStatic]
+ private static ThreadContext Context;
+
+ public static void RegisterThread(ExecutionContext context, IMemoryManager memory, Translator translator)
+ {
+ Context = new ThreadContext(context, memory, translator);
+ }
+
+ public static void UnregisterThread()
+ {
+ Context = null;
+ }
+
+ public static void Break(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnBreak(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void SupervisorCall(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnSupervisorCall(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void Undefined(ulong address, int opCode)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnUndefined(address, opCode);
+
+ Statistics.ResumeTimer();
+ }
+
+ #region "System registers"
+ public static ulong GetCtrEl0()
+ {
+ return GetContext().CtrEl0;
+ }
+
+ public static ulong GetDczidEl0()
+ {
+ return GetContext().DczidEl0;
+ }
+
+ public static ulong GetCntfrqEl0()
+ {
+ return GetContext().CntfrqEl0;
+ }
+
+ public static ulong GetCntpctEl0()
+ {
+ return GetContext().CntpctEl0;
+ }
+
+ public static ulong GetCntvctEl0()
+ {
+ return GetContext().CntvctEl0;
+ }
+ #endregion
+
+ #region "Read"
+ public static byte ReadByte(ulong address)
+ {
+ return GetMemoryManager().ReadGuest(address);
+ }
+
+ public static ushort ReadUInt16(ulong address)
+ {
+ return GetMemoryManager().ReadGuest(address);
+ }
+
+ public static uint ReadUInt32(ulong address)
+ {
+ return GetMemoryManager().ReadGuest(address);
+ }
+
+ public static ulong ReadUInt64(ulong address)
+ {
+ return GetMemoryManager().ReadGuest(address);
+ }
+
+ public static V128 ReadVector128(ulong address)
+ {
+ return GetMemoryManager().ReadGuest(address);
+ }
+ #endregion
+
+ #region "Write"
+ public static void WriteByte(ulong address, byte value)
+ {
+ GetMemoryManager().WriteGuest(address, value);
+ }
+
+ public static void WriteUInt16(ulong address, ushort value)
+ {
+ GetMemoryManager().WriteGuest(address, value);
+ }
+
+ public static void WriteUInt32(ulong address, uint value)
+ {
+ GetMemoryManager().WriteGuest(address, value);
+ }
+
+ public static void WriteUInt64(ulong address, ulong value)
+ {
+ GetMemoryManager().WriteGuest(address, value);
+ }
+
+ public static void WriteVector128(ulong address, V128 value)
+ {
+ GetMemoryManager().WriteGuest(address, value);
+ }
+ #endregion
+
+ public static void EnqueueForRejit(ulong address)
+ {
+ Context.Translator.EnqueueForRejit(address, GetContext().ExecutionMode);
+ }
+
+ public static void SignalMemoryTracking(ulong address, ulong size, bool write)
+ {
+ GetMemoryManager().SignalMemoryTracking(address, size, write);
+ }
+
+ public static void ThrowInvalidMemoryAccess(ulong address)
+ {
+ throw new InvalidAccessException(address);
+ }
+
+ public static ulong GetFunctionAddress(ulong address)
+ {
+ TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode);
+
+ return (ulong)function.FuncPointer.ToInt64();
+ }
+
+ public static void InvalidateCacheLine(ulong address)
+ {
+ Context.Translator.InvalidateJitCacheRegion(address, InstEmit.DczSizeInBytes);
+ }
+
+ public static bool CheckSynchronization()
+ {
+ Statistics.PauseTimer();
+
+ ExecutionContext context = GetContext();
+
+ context.CheckInterrupt();
+
+ Statistics.ResumeTimer();
+
+ return context.Running;
+ }
+
+ public static ExecutionContext GetContext()
+ {
+ return Context.Context;
+ }
+
+ public static IMemoryManager GetMemoryManager()
+ {
+ return Context.Memory;
+ }
+ }
+}
diff --git a/src/ARMeilleure/Instructions/SoftFallback.cs b/src/ARMeilleure/Instructions/SoftFallback.cs
new file mode 100644
index 0000000..c4fe677
--- /dev/null
+++ b/src/ARMeilleure/Instructions/SoftFallback.cs
@@ -0,0 +1,648 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFallback
+ {
+ #region "ShrImm64"
+ public static long SignedShrImm64(long value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ if (value < 0L)
+ {
+ return -1L;
+ }
+ else /* if (value >= 0L) */
+ {
+ return 0L;
+ }
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ if (shift <= 63)
+ {
+ long add = value + roundConst;
+
+ if ((~value & (value ^ add)) < 0L)
+ {
+ return (long)((ulong)add >> shift);
+ }
+ else
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (shift == 64) */
+ {
+ return 0L;
+ }
+ }
+ }
+
+ public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ ulong add = value + (ulong)roundConst;
+
+ if ((add < value) && (add < (ulong)roundConst))
+ {
+ if (shift <= 63)
+ {
+ return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+ }
+ else /* if (shift == 64) */
+ {
+ return 1UL;
+ }
+ }
+ else
+ {
+ if (shift <= 63)
+ {
+ return add >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ }
+ }
+ #endregion
+
+ #region "Saturation"
+ public static int SatF32ToS32(float value)
+ {
+ if (float.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF32ToS64(float value)
+ {
+ if (float.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF32ToU32(float value)
+ {
+ if (float.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF32ToU64(float value)
+ {
+ if (float.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+
+ public static int SatF64ToS32(double value)
+ {
+ if (double.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF64ToS64(double value)
+ {
+ if (double.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF64ToU32(double value)
+ {
+ if (double.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF64ToU64(double value)
+ {
+ if (double.IsNaN(value))
+ {
+ return 0;
+ }
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+ #endregion
+
+ #region "Count"
+ public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ value ^= value >> 1;
+
+ int highBit = size - 2;
+
+ for (int bit = highBit; bit >= 0; bit--)
+ {
+ if (((int)(value >> bit) & 0b1) != 0)
+ {
+ return (ulong)(highBit - bit);
+ }
+ }
+
+ return (ulong)(size - 1);
+ }
+
+ private static ReadOnlySpan ClzNibbleTbl => new byte[] { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ if (value == 0ul)
+ {
+ return (ulong)size;
+ }
+
+ int nibbleIdx = size;
+ int preCount, count = 0;
+
+ do
+ {
+ nibbleIdx -= 4;
+ preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
+ count += preCount;
+ }
+ while (preCount == 4);
+
+ return (ulong)count;
+ }
+ #endregion
+
+ #region "Table"
+ public static V128 Tbl1(V128 vector, int bytes, V128 tb0)
+ {
+ return TblOrTbx(default, vector, bytes, tb0);
+ }
+
+ public static V128 Tbl2(V128 vector, int bytes, V128 tb0, V128 tb1)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1);
+ }
+
+ public static V128 Tbl3(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbl4(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return TblOrTbx(default, vector, bytes, tb0, tb1, tb2, tb3);
+ }
+
+ public static V128 Tbx1(V128 dest, V128 vector, int bytes, V128 tb0)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0);
+ }
+
+ public static V128 Tbx2(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1);
+ }
+
+ public static V128 Tbx3(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbx4(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2, tb3);
+ }
+
+ private static V128 TblOrTbx(V128 dest, V128 vector, int bytes, params V128[] tb)
+ {
+ byte[] res = new byte[16];
+
+ if (dest != default)
+ {
+ Buffer.BlockCopy(dest.ToArray(), 0, res, 0, bytes);
+ }
+
+ byte[] table = new byte[tb.Length * 16];
+
+ for (byte index = 0; index < tb.Length; index++)
+ {
+ Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
+ }
+
+ byte[] v = vector.ToArray();
+
+ for (byte index = 0; index < bytes; index++)
+ {
+ byte tblIndex = v[index];
+
+ if (tblIndex < table.Length)
+ {
+ res[index] = table[tblIndex];
+ }
+ }
+
+ return new V128(res);
+ }
+ #endregion
+
+ #region "Crc32"
+ private const uint Crc32RevPoly = 0xedb88320;
+ private const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static uint Crc32b(uint crc, byte value) => Crc32(crc, Crc32RevPoly, value);
+ public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
+ public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
+ public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
+
+ public static uint Crc32cb(uint crc, byte value) => Crc32(crc, Crc32cRevPoly, value);
+ public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
+ public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
+ public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
+
+ private static uint Crc32h(uint crc, uint poly, ushort val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+
+ return crc;
+ }
+
+ private static uint Crc32w(uint crc, uint poly, uint val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+
+ return crc;
+ }
+
+ private static uint Crc32x(uint crc, uint poly, ulong val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+ crc = Crc32(crc, poly, (byte)(val >> 32));
+ crc = Crc32(crc, poly, (byte)(val >> 40));
+ crc = Crc32(crc, poly, (byte)(val >> 48));
+ crc = Crc32(crc, poly, (byte)(val >> 56));
+
+ return crc;
+ }
+
+ private static uint Crc32(uint crc, uint poly, byte val)
+ {
+ crc ^= val;
+
+ for (int bit = 7; bit >= 0; bit--)
+ {
+ uint mask = (uint)(-(int)(crc & 1));
+
+ crc = (crc >> 1) ^ (poly & mask);
+ }
+
+ return crc;
+ }
+ #endregion
+
+ #region "Aes"
+ public static V128 Decrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
+ }
+
+ public static V128 Encrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
+ }
+
+ public static V128 InverseMixColumns(V128 value)
+ {
+ return CryptoHelper.AesInvMixColumns(value);
+ }
+
+ public static V128 MixColumns(V128 value)
+ {
+ return CryptoHelper.AesMixColumns(value);
+ }
+ #endregion
+
+ #region "Sha1"
+ public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaChoose(hash_abcd.Extract(1),
+ hash_abcd.Extract(2),
+ hash_abcd.Extract