diff options
Diffstat (limited to 'data/x264/gas-preprocessor.pl')
-rwxr-xr-x | data/x264/gas-preprocessor.pl | 1180 |
1 files changed, 0 insertions, 1180 deletions
diff --git a/data/x264/gas-preprocessor.pl b/data/x264/gas-preprocessor.pl deleted file mode 100755 index 743ce45e7..000000000 --- a/data/x264/gas-preprocessor.pl +++ /dev/null @@ -1,1180 +0,0 @@ -#!/usr/bin/env perl -# by David Conrad -# This code is licensed under GPLv2 or later; go to gnu.org to read it -# (not that it much matters for an asm preprocessor) -# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc" -use strict; - -# Apple's gas is ancient and doesn't support modern preprocessing features like -# .rept and has ugly macro syntax, among other things. Thus, this script -# implements the subset of the gas preprocessor used by x264 and ffmpeg -# that isn't supported by Apple's gas. - -my %canonical_arch = ("aarch64" => "aarch64", "arm64" => "aarch64", - "arm" => "arm", - "powerpc" => "powerpc", "ppc" => "powerpc"); - -my %comments = ("aarch64" => '//', - "arm" => '@', - "ppc" => '#', - "powerpc" => '#'); - -my @gcc_cmd; -my @preprocess_c_cmd; - -my $comm; -my $arch; -my $as_type = "apple-gas"; - -my $fix_unreq = $^O eq "darwin"; -my $force_thumb = 0; - -my $arm_cond_codes = "eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo"; - -my $usage_str = " -$0\n -Gas-preprocessor.pl converts assembler files using modern GNU as syntax for -Apple's ancient gas version or clang's incompatible integrated assembler. The -conversion is regularly tested for FFmpeg, Libav, x264 and vlc. Other projects might -use different features which are not correctly handled. - -Options for this program needs to be separated with ' -- ' from the assembler -command. Following options are currently supported: - - -help - this usage text - -arch - target architecture - -as-type - one value out of {{,apple-}{gas,clang},armasm} - -fix-unreq - -no-fix-unreq - -force-thumb - assemble as thumb regardless of the input source - (note, this is incomplete and only works for sources - it explicitly was tested with) -"; - -sub usage() { - print $usage_str; -} - -while (@ARGV) { - my $opt = shift; - - if ($opt =~ /^-(no-)?fix-unreq$/) { - $fix_unreq = $1 ne "no-"; - } elsif ($opt eq "-force-thumb") { - $force_thumb = 1; - } elsif ($opt eq "-arch") { - $arch = shift; - die "unknown arch: '$arch'\n" if not exists $canonical_arch{$arch}; - } elsif ($opt eq "-as-type") { - $as_type = shift; - die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang|llvm_gcc)|armasm)$/; - } elsif ($opt eq "-help") { - usage(); - exit 0; - } elsif ($opt eq "--" ) { - @gcc_cmd = @ARGV; - } elsif ($opt =~ /^-/) { - die "option '$opt' is not known. See '$0 -help' for usage information\n"; - } else { - push @gcc_cmd, $opt, @ARGV; - } - last if (@gcc_cmd); -} - -if (grep /\.c$/, @gcc_cmd) { - # C file (inline asm?) - compile - @preprocess_c_cmd = (@gcc_cmd, "-S"); -} elsif (grep /\.[sS]$/, @gcc_cmd) { - # asm file, just do C preprocessor - @preprocess_c_cmd = (@gcc_cmd, "-E"); -} elsif (grep /-(v|h|-version|dumpversion)/, @gcc_cmd) { - # pass -v/--version along, used during probing. Matching '-v' might have - # uninteded results but it doesn't matter much if gas-preprocessor or - # the compiler fails. - exec(@gcc_cmd); -} else { - die "Unrecognized input filetype"; -} -if ($as_type eq "armasm") { - - $preprocess_c_cmd[0] = "cpp"; - push(@preprocess_c_cmd, "-undef"); - # Normally a preprocessor for windows would predefine _WIN32, - # but we're using any generic system-agnostic preprocessor "cpp" - # with -undef (to avoid getting predefined variables from the host - # system in cross compilation cases), so manually define it here. - push(@preprocess_c_cmd, "-D_WIN32"); - - @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd; - # Remove -ignore XX parameter pairs from preprocess_c_cmd - my $index = 1; - while ($index < $#preprocess_c_cmd) { - if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) { - splice(@preprocess_c_cmd, $index, 2); - next; - } - $index++; - } - if (grep /^-MM$/, @preprocess_c_cmd) { - system(@preprocess_c_cmd) == 0 or die "Error running preprocessor"; - exit 0; - } -} - -# if compiling, avoid creating an output file named '-.o' -if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) { - foreach my $i (@gcc_cmd) { - if ($i =~ /\.[csS]$/) { - my $outputfile = $i; - $outputfile =~ s/\.[csS]$/.o/; - push(@gcc_cmd, "-o"); - push(@gcc_cmd, $outputfile); - last; - } - } -} -# replace only the '-o' argument with '-', avoids rewriting the make dependency -# target specified with -MT to '-' -my $index = 1; -while ($index < $#preprocess_c_cmd) { - if ($preprocess_c_cmd[$index] eq "-o") { - $index++; - $preprocess_c_cmd[$index] = "-"; - } - $index++; -} - -my $tempfile; -if ($as_type ne "armasm") { - @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd; -} else { - @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd; - @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd; - - @preprocess_c_cmd = grep ! /^-G/, @preprocess_c_cmd; - @preprocess_c_cmd = grep ! /^-W/, @preprocess_c_cmd; - @preprocess_c_cmd = grep ! /^-Z/, @preprocess_c_cmd; - @preprocess_c_cmd = grep ! /^-fp/, @preprocess_c_cmd; - @preprocess_c_cmd = grep ! /^-EHsc$/, @preprocess_c_cmd; - @preprocess_c_cmd = grep ! /^-O/, @preprocess_c_cmd; - - @gcc_cmd = grep ! /^-G/, @gcc_cmd; - @gcc_cmd = grep ! /^-W/, @gcc_cmd; - @gcc_cmd = grep ! /^-Z/, @gcc_cmd; - @gcc_cmd = grep ! /^-fp/, @gcc_cmd; - @gcc_cmd = grep ! /^-EHsc$/, @gcc_cmd; - @gcc_cmd = grep ! /^-O/, @gcc_cmd; - - my @outfiles = grep /\.(o|obj)$/, @gcc_cmd; - $tempfile = $outfiles[0].".asm"; - - # Remove most parameters from gcc_cmd, which actually is the armasm command, - # which doesn't support any of the common compiler/preprocessor options. - @gcc_cmd = grep ! /^-D/, @gcc_cmd; - @gcc_cmd = grep ! /^-U/, @gcc_cmd; - @gcc_cmd = grep ! /^-m/, @gcc_cmd; - @gcc_cmd = grep ! /^-M/, @gcc_cmd; - @gcc_cmd = grep ! /^-c$/, @gcc_cmd; - @gcc_cmd = grep ! /^-I/, @gcc_cmd; - @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd; -} - -# detect architecture from gcc binary name -if (!$arch) { - if ($gcc_cmd[0] =~ /(arm64|aarch64|arm|powerpc|ppc)/) { - $arch = $1; - } else { - # look for -arch flag - foreach my $i (1 .. $#gcc_cmd-1) { - if ($gcc_cmd[$i] eq "-arch" and - $gcc_cmd[$i+1] =~ /(arm64|aarch64|arm|powerpc|ppc)/) { - $arch = $1; - } - } - } -} - -# assume we're not cross-compiling if no -arch or the binary doesn't have the arch name -$arch = qx/arch/ if (!$arch); - -die "Unknown target architecture '$arch'" if not exists $canonical_arch{$arch}; - -$arch = $canonical_arch{$arch}; -$comm = $comments{$arch}; -my $inputcomm = $comm; -$comm = ";" if $as_type =~ /armasm/; - -my %ppc_spr = (ctr => 9, - vrsave => 256); - -open(INPUT, "-|", @preprocess_c_cmd) || die "Error running preprocessor"; - -if ($ENV{GASPP_DEBUG}) { - open(ASMFILE, ">&STDOUT"); -} else { - if ($as_type ne "armasm") { - open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler"; - } else { - open(ASMFILE, ">", $tempfile); - } -} - -my $current_macro = ''; -my $macro_level = 0; -my $rept_level = 0; -my %macro_lines; -my %macro_args; -my %macro_args_default; -my $macro_count = 0; -my $altmacro = 0; -my $in_irp = 0; - -my $num_repts; -my @rept_lines; - -my @irp_args; -my $irp_param; - -my @ifstack; - -my %symbols; - -my @sections; - -my %literal_labels; # for ldr <reg>, =<expr> -my $literal_num = 0; -my $literal_expr = ".word"; -$literal_expr = ".quad" if $arch eq "aarch64"; - -my $thumb = 0; - -my %thumb_labels; -my %call_targets; -my %import_symbols; - -my %neon_alias_reg; -my %neon_alias_type; - -my $temp_label_next = 0; -my %last_temp_labels; -my %next_temp_labels; - -my %labels_seen; - -my %aarch64_req_alias; - -if ($force_thumb) { - parse_line(".thumb\n"); -} - -# pass 1: parse .macro -# note that the handling of arguments is probably overly permissive vs. gas -# but it should be the same for valid cases -while (<INPUT>) { - # remove lines starting with '#', preprocessing is done, '#' at start of - # the line indicates a comment for all supported archs (aarch64, arm, ppc - # and x86). Also strips line number comments but since they are off anyway - # it is no loss. - s/^\s*#.*$//; - # remove all comments (to avoid interfering with evaluating directives) - s/(?<!\\)$inputcomm.*//x; - # Strip out windows linefeeds - s/\r$//; - - foreach my $subline (split(";", $_)) { - # Add newlines at the end of lines that don't already have one - chomp $subline; - $subline .= "\n"; - parse_line($subline); - } -} - -sub eval_expr { - my $expr = $_[0]; - while ($expr =~ /([A-Za-z._][A-Za-z0-9._]*)/g) { - my $sym = $1; - $expr =~ s/$sym/($symbols{$sym})/ if defined $symbols{$sym}; - } - eval $expr; -} - -sub handle_if { - my $line = $_[0]; - # handle .if directives; apple's assembler doesn't support important non-basic ones - # evaluating them is also needed to handle recursive macros - if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) { - my $result = $1 eq "n"; - my $type = $2; - my $expr = $3; - - if ($type eq "b") { - $expr =~ s/\s//g; - $result ^= $expr eq ""; - } elsif ($type eq "c") { - if ($expr =~ /(.*)\s*,\s*(.*)/) { - $result ^= $1 eq $2; - } else { - die "argument to .ifc not recognized"; - } - } elsif ($type eq "") { - $result ^= eval_expr($expr) != 0; - } elsif ($type eq "eq") { - $result = eval_expr($expr) == 0; - } elsif ($type eq "lt") { - $result = eval_expr($expr) < 0; - } else { - chomp($line); - die "unhandled .if varient. \"$line\""; - } - push (@ifstack, $result); - return 1; - } else { - return 0; - } -} - -sub parse_if_line { - my $line = $_[0]; - - # evaluate .if blocks - if (scalar(@ifstack)) { - # Don't evaluate any new if statements if we're within - # a repetition or macro - they will be evaluated once - # the repetition is unrolled or the macro is expanded. - if (scalar(@rept_lines) == 0 and $macro_level == 0) { - if ($line =~ /\.endif/) { - pop(@ifstack); - return 1; - } elsif ($line =~ /\.elseif\s+(.*)/) { - if ($ifstack[-1] == 0) { - $ifstack[-1] = !!eval_expr($1); - } elsif ($ifstack[-1] > 0) { - $ifstack[-1] = -$ifstack[-1]; - } - return 1; - } elsif ($line =~ /\.else/) { - $ifstack[-1] = !$ifstack[-1]; - return 1; - } elsif (handle_if($line)) { - return 1; - } - } - - # discard lines in false .if blocks - foreach my $i (0 .. $#ifstack) { - if ($ifstack[$i] <= 0) { - return 1; - } - } - } - return 0; -} - -sub parse_line { - my $line = $_[0]; - - return if (parse_if_line($line)); - - if (scalar(@rept_lines) == 0) { - if (/\.macro/) { - $macro_level++; - if ($macro_level > 1 && !$current_macro) { - die "nested macros but we don't have master macro"; - } - } elsif (/\.endm/) { - $macro_level--; - if ($macro_level < 0) { - die "unmatched .endm"; - } elsif ($macro_level == 0) { - $current_macro = ''; - return; - } - } - } - - if ($macro_level == 0) { - if ($line =~ /\.(rept|irp)/) { - $rept_level++; - } elsif ($line =~ /.endr/) { - $rept_level--; - } - } - - if ($macro_level > 1) { - push(@{$macro_lines{$current_macro}}, $line); - } elsif (scalar(@rept_lines) and $rept_level >= 1) { - push(@rept_lines, $line); - } elsif ($macro_level == 0) { - expand_macros($line); - } else { - if ($line =~ /\.macro\s+([\d\w\.]+)\s*,?\s*(.*)/) { - $current_macro = $1; - - # commas in the argument list are optional, so only use whitespace as the separator - my $arglist = $2; - $arglist =~ s/,/ /g; - - my @args = split(/\s+/, $arglist); - foreach my $i (0 .. $#args) { - my @argpair = split(/=/, $args[$i]); - $macro_args{$current_macro}[$i] = $argpair[0]; - $argpair[0] =~ s/:vararg$//; - $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1]; - } - # ensure %macro_lines has the macro name added as a key - $macro_lines{$current_macro} = []; - - } elsif ($current_macro) { - push(@{$macro_lines{$current_macro}}, $line); - } else { - die "macro level without a macro name"; - } - } -} - -sub handle_set { - my $line = $_[0]; - if ($line =~ /\.(?:set|equ)\s+(\S*)\s*,\s*(.*)/) { - $symbols{$1} = eval_expr($2); - return 1; - } - return 0; -} - -sub expand_macros { - my $line = $_[0]; - - # handle .if directives; apple's assembler doesn't support important non-basic ones - # evaluating them is also needed to handle recursive macros - if (handle_if($line)) { - return; - } - - if (/\.purgem\s+([\d\w\.]+)/) { - delete $macro_lines{$1}; - delete $macro_args{$1}; - delete $macro_args_default{$1}; - return; - } - - if ($line =~ /\.altmacro/) { - $altmacro = 1; - return; - } - - if ($line =~ /\.noaltmacro/) { - $altmacro = 0; - return; - } - - $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro; - - # Strip out the .set lines from the armasm output - return if (handle_set($line) and $as_type eq "armasm"); - - if ($line =~ /\.rept\s+(.*)/) { - $num_repts = $1; - @rept_lines = ("\n"); - - # handle the possibility of repeating another directive on the same line - # .endr on the same line is not valid, I don't know if a non-directive is - if ($num_repts =~ s/(\.\w+.*)//) { - push(@rept_lines, "$1\n"); - } - $num_repts = eval_expr($num_repts); - } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) { - $in_irp = 1; - $num_repts = 1; - @rept_lines = ("\n"); - $irp_param = $1; - - # only use whitespace as the separator - my $irp_arglist = $2; - $irp_arglist =~ s/,/ /g; - $irp_arglist =~ s/^\s+//; - @irp_args = split(/\s+/, $irp_arglist); - } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) { - $in_irp = 1; - $num_repts = 1; - @rept_lines = ("\n"); - $irp_param = $1; - - my $irp_arglist = $2; - $irp_arglist =~ s/,/ /g; - $irp_arglist =~ s/^\s+//; - @irp_args = split(//, $irp_arglist); - } elsif ($line =~ /\.endr/) { - my @prev_rept_lines = @rept_lines; - my $prev_in_irp = $in_irp; - my @prev_irp_args = @irp_args; - my $prev_irp_param = $irp_param; - my $prev_num_repts = $num_repts; - @rept_lines = (); - $in_irp = 0; - @irp_args = ''; - - if ($prev_in_irp != 0) { - foreach my $i (@prev_irp_args) { - foreach my $origline (@prev_rept_lines) { - my $line = $origline; - $line =~ s/\\$prev_irp_param/$i/g; - $line =~ s/\\\(\)//g; # remove \() - parse_line($line); - } - } - } else { - for (1 .. $prev_num_repts) { - foreach my $origline (@prev_rept_lines) { - my $line = $origline; - parse_line($line); - } - } - } - } elsif ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) { - handle_serialized_line($1); - my $macro = $2; - - # commas are optional here too, but are syntactically important because - # parameters can be blank - my @arglist = split(/,/, $3); - my @args; - my @args_seperator; - - my $comma_sep_required = 0; - foreach (@arglist) { - # allow arithmetic/shift operators in macro arguments - $_ =~ s/\s*(\+|-|\*|\/|<<|>>|<|>)\s*/$1/g; - - my @whitespace_split = split(/\s+/, $_); - if (!@whitespace_split) { - push(@args, ''); - push(@args_seperator, ''); - } else { - foreach (@whitespace_split) { - #print ("arglist = \"$_\"\n"); - if (length($_)) { - push(@args, $_); - my $sep = $comma_sep_required ? "," : " "; - push(@args_seperator, $sep); - #print ("sep = \"$sep\", arg = \"$_\"\n"); - $comma_sep_required = 0; - } - } - } - - $comma_sep_required = 1; - } - - my %replacements; - if ($macro_args_default{$macro}){ - %replacements = %{$macro_args_default{$macro}}; - } - - # construct hashtable of text to replace - foreach my $i (0 .. $#args) { - my $argname = $macro_args{$macro}[$i]; - my @macro_args = @{ $macro_args{$macro} }; - if ($args[$i] =~ m/=/) { - # arg=val references the argument name - # XXX: I'm not sure what the expected behaviour if a lot of - # these are mixed with unnamed args - my @named_arg = split(/=/, $args[$i]); - $replacements{$named_arg[0]} = $named_arg[1]; - } elsif ($i > $#{$macro_args{$macro}}) { - # more args given than the macro has named args - # XXX: is vararg allowed on arguments before the last? - $argname = $macro_args{$macro}[-1]; - if ($argname =~ s/:vararg$//) { - #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n"; - #$replacements{$argname} .= ", $args[$i]"; - $replacements{$argname} .= "$args_seperator[$i] $args[$i]"; - } else { - die "Too many arguments to macro $macro"; - } - } else { - $argname =~ s/:vararg$//; - $replacements{$argname} = $args[$i]; - } - } - - my $count = $macro_count++; - - # apply replacements as regex - foreach (@{$macro_lines{$macro}}) { - my $macro_line = $_; - # do replacements by longest first, this avoids wrong replacement - # when argument names are subsets of each other - foreach (reverse sort {length $a <=> length $b} keys %replacements) { - $macro_line =~ s/\\$_/$replacements{$_}/g; - } - if ($altmacro) { - foreach (reverse sort {length $a <=> length $b} keys %replacements) { - $macro_line =~ s/\b$_\b/$replacements{$_}/g; - } - } - $macro_line =~ s/\\\@/$count/g; - $macro_line =~ s/\\\(\)//g; # remove \() - parse_line($macro_line); - } - } else { - handle_serialized_line($line); - } -} - -sub is_arm_register { - my $name = $_[0]; - if ($name eq "lr" or - $name eq "ip" or - $name =~ /^[rav]\d+$/) { - return 1; - } - return 0; -} - -sub is_aarch64_register { - my $name = $_[0]; - if ($name =~ /^[xw]\d+$/) { - return 1; - } - return 0; -} - -sub handle_local_label { - my $line = $_[0]; - my $num = $_[1]; - my $dir = $_[2]; - my $target = "$num$dir"; - if ($dir eq "b") { - $line =~ s/\b$target\b/$last_temp_labels{$num}/g; - } else { - my $name = "temp_label_$temp_label_next"; - $temp_label_next++; - push(@{$next_temp_labels{$num}}, $name); - $line =~ s/\b$target\b/$name/g; - } - return $line; -} - -sub handle_serialized_line { - my $line = $_[0]; - - # handle .previous (only with regard to .section not .subsection) - if ($line =~ /\.(section|text|const_data)/) { - push(@sections, $line); - } elsif ($line =~ /\.previous/) { - if (!$sections[-2]) { - die ".previous without a previous section"; - } - $line = $sections[-2]; - push(@sections, $line); - } - - $thumb = 1 if $line =~ /\.code\s+16|\.thumb/; - $thumb = 0 if $line =~ /\.code\s+32|\.arm/; - - # handle ldr <reg>, =<expr> - if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne "armasm") { - my $label = $literal_labels{$3}; - if (!$label) { - $label = "Literal_$literal_num"; - $literal_num++; - $literal_labels{$3} = $label; - } - $line = "$1 ldr$2, $label\n"; - } elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") { - $line .= ".align 2\n"; - foreach my $literal (keys %literal_labels) { - $line .= "$literal_labels{$literal}:\n $literal_expr $literal\n"; - } - %literal_labels = (); - } - - # handle GNU as pc-relative relocations for adrp/add - if ($line =~ /(.*)\s*adrp([\w\s\d]+)\s*,\s*#?:pg_hi21:([^\s]+)/ and $as_type =~ /^apple-/) { - $line = "$1 adrp$2, ${3}\@PAGE\n"; - } elsif ($line =~ /(.*)\s*add([\w\s\d]+)\s*,([\w\s\d]+)\s*,\s*#?:lo12:([^\s]+)/ and $as_type =~ /^apple-/) { - $line = "$1 add$2, $3, ${4}\@PAGEOFF\n"; - } - - # thumb add with large immediate needs explicit add.w - if ($thumb and $line =~ /add\s+.*#([^@]+)/) { - $line =~ s/add/add.w/ if eval_expr($1) > 255; - } - - # mach-o local symbol names start with L (no dot) - $line =~ s/(?<!\w)\.(L\w+)/$1/g; - - # recycle the '.func' directive for '.thumb_func' - if ($thumb and $as_type =~ /^apple-/) { - $line =~ s/\.func/.thumb_func/x; - } - - if ($thumb and $line =~ /^\s*(\w+)\s*:/) { - $thumb_labels{$1}++; - } - - if ($as_type =~ /^apple-/ and - $line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.global)\s+(\w+)/) { - my $cond = $3; - my $label = $4; - # Don't interpret e.g. bic as b<cc> with ic as conditional code - if ($cond =~ /^(|$arm_cond_codes)$/) { - if (exists $thumb_labels{$label}) { - print ASMFILE ".thumb_func $label\n"; - } else { - $call_targets{$label}++; - } - } - } - - # @l -> lo16() @ha -> ha16() - $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g; - $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g; - - # move to/from SPR - if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) { - if ($2 eq 'mt') { - $line = "$1${2}spr $ppc_spr{$3}, $4\n"; - } else { - $line = "$1${2}spr $4, $ppc_spr{$3}\n"; - } - } - - if ($line =~ /\.unreq\s+(.*)/) { - if (defined $neon_alias_reg{$1}) { - delete $neon_alias_reg{$1}; - delete $neon_alias_type{$1}; - return; - } elsif (defined $aarch64_req_alias{$1}) { - delete $aarch64_req_alias{$1}; - return; - } - } - # old gas versions store upper and lower case names on .req, - # but they remove only one on .unreq - if ($fix_unreq) { - if ($line =~ /\.unreq\s+(.*)/) { - $line = ".unreq " . lc($1) . "\n"; - $line .= ".unreq " . uc($1) . "\n"; - } - } - - if ($line =~ /(\w+)\s+\.(dn|qn)\s+(\w+)(?:\.(\w+))?(\[\d+\])?/) { - $neon_alias_reg{$1} = "$3$5"; - $neon_alias_type{$1} = $4; - return; - } - if (scalar keys %neon_alias_reg > 0 && $line =~ /^\s+v\w+/) { - # This line seems to possibly have a neon instruction - foreach (keys %neon_alias_reg) { - my $alias = $_; - # Require the register alias to match as an invididual word, not as a substring - # of a larger word-token. - if ($line =~ /\b$alias\b/) { - $line =~ s/\b$alias\b/$neon_alias_reg{$alias}/g; - # Add the type suffix. If multiple aliases match on the same line, - # only do this replacement the first time (a vfoo.bar string won't match v\w+). - $line =~ s/^(\s+)(v\w+)(\s+)/$1$2.$neon_alias_type{$alias}$3/; - } - } - } - - if ($arch eq "aarch64" or $as_type eq "armasm") { - # clang's integrated aarch64 assembler in Xcode 5 does not support .req/.unreq - if ($line =~ /\b(\w+)\s+\.req\s+(\w+)\b/) { - $aarch64_req_alias{$1} = $2; - return; - } - foreach (keys %aarch64_req_alias) { - my $alias = $_; - # recursively resolve aliases - my $resolved = $aarch64_req_alias{$alias}; - while (defined $aarch64_req_alias{$resolved}) { - $resolved = $aarch64_req_alias{$resolved}; - } - $line =~ s/\b$alias\b/$resolved/g; - } - } - if ($arch eq "aarch64") { - # fix missing aarch64 instructions in Xcode 5.1 (beta3) - # mov with vector arguments is not supported, use alias orr instead - if ($line =~ /^(\d+:)?\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) { - $line = "$1 orr $2, $3, $3\n"; - } - # movi 16, 32 bit shifted variant, shift is optional - if ($line =~ /^(\d+:)?\s*movi\s+(v[0-3]?\d\.(?:2|4|8)[hsHS])\s*,\s*(#\w+)\b\s*$/) { - $line = "$1 movi $2, $3, lsl #0\n"; - } - # Xcode 5 misses the alias uxtl. Replace it with the more general ushll. - # Clang 3.4 misses the alias sxtl too. Replace it with the more general sshll. - # armasm64 also misses these instructions. - if ($line =~ /^(\d+:)?\s*(s|u)xtl(2)?\s+(v[0-3]?\d\.[248][hsdHSD])\s*,\s*(v[0-3]?\d\.(?:2|4|8|16)[bhsBHS])\b\s*$/) { - $line = "$1 $2shll$3 $4, $5, #0\n"; - } - # clang 3.4 and armasm64 do not automatically use shifted immediates in add/sub - if (($as_type eq "clang" or $as_type eq "armasm") and - $line =~ /^(\d+:)?(\s*(?:add|sub)s?) ([^#l]+)#([\d\+\-\*\/ <>]+)\s*$/) { - my $imm = eval $4; - if ($imm > 4095 and not ($imm & 4095)) { - $line = "$1 $2 $3#" . ($imm >> 12) . ", lsl #12\n"; - } - } - if ($ENV{GASPP_FIX_XCODE5}) { - if ($line =~ /^\s*bsl\b/) { - $line =~ s/\b(bsl)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/; - $line =~ s/\b(v[0-3]?\d)\.$3\b/$1/g; - } - if ($line =~ /^\s*saddl2?\b/) { - $line =~ s/\b(saddl2?)(\s+v[0-3]?\d\.(\w+))\b/$1.$3$2/; - $line =~ s/\b(v[0-3]?\d)\.\w+\b/$1/g; - } - if ($line =~ /^\s*dup\b.*\]$/) { - $line =~ s/\bdup(\s+v[0-3]?\d)\.(\w+)\b/dup.$2$1/g; - $line =~ s/\b(v[0-3]?\d)\.[bhsdBHSD](\[\d\])$/$1$2/g; - } - } - } - - if ($as_type eq "armasm") { - # Also replace variables set by .set - foreach (keys %symbols) { - my $sym = $_; - $line =~ s/\b$sym\b/$symbols{$sym}/g; - } - - # Handle function declarations and keep track of the declared labels - if ($line =~ s/^\s*\.func\s+(\w+)/$1 PROC/) { - $labels_seen{$1} = 1; - } - - if ($line =~ s/^\s*(\d+)://) { - # Convert local labels into unique labels. armasm (at least in - # RVCT) has something similar, but still different enough. - # By converting to unique labels we avoid any possible - # incompatibilities. - - my $num = $1; - foreach (@{$next_temp_labels{$num}}) { - $line = "$_\n" . $line; - } - @next_temp_labels{$num} = (); - my $name = "temp_label_$temp_label_next"; - $temp_label_next++; - # The matching regexp above removes the label from the start of - # the line (which might contain an instruction as well), readd - # it on a separate line above it. - $line = "$name:\n" . $line; - $last_temp_labels{$num} = $name; - } - - if ($line =~ s/^\s*(\w+):/$1/) { - # Skip labels that have already been declared with a PROC, - # labels must not be declared multiple times. - return if (defined $labels_seen{$1}); - $labels_seen{$1} = 1; - } elsif ($line !~ /(\w+) PROC/) { - # If not a label, make sure the line starts with whitespace, - # otherwise ms armasm interprets it incorrectly. - $line =~ s/^[\.\w]/\t$&/; - } - - - # Check branch instructions - if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?\.?(..)?(\.w)?)\s+(\w+)/) { - my $instr = $2; - my $cond = $3; - my $width = $4; - my $target = $5; - # Don't interpret e.g. bic as b<cc> with ic as conditional code - if ($cond !~ /^(|$arm_cond_codes)$/) { - # Not actually a branch - } elsif ($target =~ /^(\d+)([bf])$/) { - # The target is a local label - $line = handle_local_label($line, $1, $2); - $line =~ s/\b$instr\b/$&.w/ if $width eq "" and $arch eq "arm"; - } elsif (($arch eq "arm" and !is_arm_register($target)) or - ($arch eq "aarch64" and !is_aarch64_register($target))) { - $call_targets{$target}++; - } - } elsif ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(cbn?z|adr|tbz)\s+(\w+)\s*,(\s*#\d+\s*,)?\s*(\w+)/) { - my $instr = $2; - my $reg = $3; - my $bit = $4; - my $target = $5; - if ($target =~ /^(\d+)([bf])$/) { - # The target is a local label - $line = handle_local_label($line, $1, $2); - } else { - $call_targets{$target}++; - } - # Convert tbz with a wX register into an xX register, - # due to armasm64 bugs/limitations. - if ($instr eq "tbz" and $reg =~ /w\d+/) { - my $xreg = $reg; - $xreg =~ s/w/x/; - $line =~ s/\b$reg\b/$xreg/; - } - } elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) { - while ($line =~ /\b(\d+)([bf])\b/g) { - $line = handle_local_label($line, $1, $2); - } - } - - # ALIGN in armasm syntax is the actual number of bytes - if ($line =~ /\.(?:p2)?align\s+(\d+)/) { - my $align = 1 << $1; - $line =~ s/\.(?:p2)?align\s(\d+)/ALIGN $align/; - } - # Convert gas style [r0, :128] into armasm [r0@128] alignment specification - $line =~ s/\[([^\[,]+),?\s*:(\d+)\]/[$1\@$2]/g; - - # armasm treats logical values {TRUE} and {FALSE} separately from - # numeric values - logical operators and values can't be intermixed - # with numerical values. Evaluate !<number> and (a <> b) into numbers, - # let the assembler evaluate the rest of the expressions. This current - # only works for cases when ! and <> are used with actual constant numbers, - # we don't evaluate subexpressions here. - - # Evaluate !<number> - while ($line =~ /!\s*(\d+)/g) { - my $val = ($1 != 0) ? 0 : 1; - $line =~ s/!(\d+)/$val/; - } - # Evaluate (a > b) - while ($line =~ /\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/) { - my $val; - if ($2 eq "<") { - $val = ($1 < $3) ? 1 : 0; - } else { - $val = ($1 > $3) ? 1 : 0; - } - $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/; - } - - if ($arch eq "arm") { - # Change a movw... #:lower16: into a mov32 pseudoinstruction - $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/; - # and remove the following, matching movt completely - $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//; - - if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) { - $import_symbols{$1}++; - } - - # Misc bugs/deficiencies: - # armasm seems unable to parse e.g. "vmov s0, s1" without a type - # qualifier, thus add .f32. - $line =~ s/^(\s+(?:vmov|vadd))(\s+s\d+\s*,\s*s\d+)/$1.f32$2/; - } elsif ($arch eq "aarch64") { - # Convert ext into ext8; armasm64 seems to require it named as ext8. - $line =~ s/^(\s+)ext(\s+)/$1ext8$2/; - - # Pick up targets from ldr x0, =sym+offset - if ($line =~ /^\s*ldr\s+(\w+)\s*,\s*=([a-zA-Z]\w*)(.*)$/) { - my $reg = $1; - my $sym = $2; - my $offset = eval_expr($3); - if ($offset < 0) { - # armasm64 is buggy with ldr x0, =sym+offset where the - # offset is a negative value; it does write a negative - # offset into the literal pool as it should, but the - # negative offset only covers the lower 32 bit of the 64 - # bit literal/relocation. - # Thus remove the offset and apply it manually with a sub - # afterwards. - $offset = -$offset; - $line = "\tldr $reg, =$sym\n\tsub $reg, $reg, #$offset\n"; - } - $import_symbols{$sym}++; - } - - # armasm64 (currently) doesn't support offsets on adrp targets, - # even though the COFF format relocations (and the linker) - # supports it. Therefore strip out the offsets from adrp and - # add :lo12: (in case future armasm64 would start handling it) - # and add an extra explicit add instruction for the offset. - if ($line =~ s/(adrp\s+\w+\s*,\s*(\w+))([\d\+\-\*\/\(\) <>]+)?/\1/) { - $import_symbols{$2}++; - } - if ($line =~ s/(add\s+(\w+)\s*,\s*\w+\s*,\s*):lo12:(\w+)([\d\+\-\*\/\(\) <>]+)?/\1\3/) { - my $reg = $2; - my $sym = $3; - my $offset = eval_expr($4); - $line .= "\tadd $reg, $reg, #$offset\n" if $offset > 0; - $import_symbols{$sym}++; - } - - # Convert e.g. "add x0, x0, w0, uxtw" into "add x0, x0, w0, uxtw #0", - # or "ldr x0, [x0, w0, uxtw]" into "ldr x0, [x0, w0, uxtw #0]". - $line =~ s/(uxtw|sxtw)(\s*\]?\s*)$/\1 #0\2/i; - - # Convert "mov x0, v0.d[0]" into "umov x0, v0.d[0]" - $line =~ s/\bmov\s+[xw]\d+\s*,\s*v\d+\.[ds]/u$&/i; - - # Convert "ccmp w0, #0, #0, ne" into "ccmpne w0, #0, #0", - # and "csel w0, w0, w0, ne" into "cselne w0, w0, w0". - $line =~ s/(ccmp|csel)\s+([xw]\w+)\s*,\s*([xw#]\w+)\s*,\s*([xw#]\w+)\s*,\s*($arm_cond_codes)/\1\5 \2, \3, \4/; - - # Convert "cinc w0, w0, ne" into "cincne w0, w0". - $line =~ s/(cinc)\s+([xw]\w+)\s*,\s*([xw]\w+)\s*,\s*($arm_cond_codes)/\1\4 \2, \3/; - - # Convert "cset w0, lo" into "csetlo w0" - $line =~ s/(cset)\s+([xw]\w+)\s*,\s*($arm_cond_codes)/\1\3 \2/; - - # Strip out prfum; armasm64 fails to assemble any - # variant/combination of prfum tested so far, but it can be - # left out without any - $line =~ s/prfum.*\]//; - - # Convert "ldrb w0, [x0, #-1]" into "ldurb w0, [x0, #-1]". - # Don't do this for forms with writeback though. - if ($line =~ /(ld|st)(r[bh]?)\s+(\w+)\s*,\s*\[\s*(\w+)\s*,\s*#([^\]]+)\s*\][^!]/) { - my $instr = $1; - my $suffix = $2; - my $target = $3; - my $base = $4; - my $offset = eval_expr($5); - if ($offset < 0) { - $line =~ s/$instr$suffix/${instr}u$suffix/; - } - } - - if ($ENV{GASPP_ARMASM64_INVERT_SCALE}) { - # Instructions like fcvtzs and scvtf store the scale value - # inverted in the opcode (stored as 64 - scale), but armasm64 - # in early versions stores it as-is. Thus convert from - # "fcvtzs w0, s0, #8" into "fcvtzs w0, s0, #56". - if ($line =~ /(?:fcvtzs|scvtf)\s+(\w+)\s*,\s*(\w+)\s*,\s*#(\d+)/) { - my $scale = $3; - my $inverted_scale = 64 - $3; - $line =~ s/#$scale/#$inverted_scale/; - } - } - } - # armasm is unable to parse &0x - add spacing - $line =~ s/&0x/& 0x/g; - } - - if ($force_thumb) { - # Convert register post indexing to a separate add instruction. - # This converts e.g. "ldr r0, [r1], r2" into "ldr r0, [r1]", - # "add r1, r1, r2". - $line =~ s/((?:ldr|str)[bh]?)\s+(\w+),\s*\[(\w+)\],\s*(\w+)/$1 $2, [$3]\n\tadd $3, $3, $4/g; - - # Convert "mov pc, lr" into "bx lr", since the former only works - # for switching from arm to thumb (and only in armv7), but not - # from thumb to arm. - $line =~ s/mov\s*pc\s*,\s*lr/bx lr/g; - - # Convert stmdb/ldmia/stmfd/ldmfd/ldm with only one register into a plain str/ldr with post-increment/decrement. - # Wide thumb2 encoding requires at least two registers in register list while all other encodings support one register too. - $line =~ s/stm(?:db|fd)\s+sp!\s*,\s*\{([^,-]+)\}/str $1, [sp, #-4]!/g; - $line =~ s/ldm(?:ia|fd)?\s+sp!\s*,\s*\{([^,-]+)\}/ldr $1, [sp], #4/g; - - # Convert muls into mul+cmp - $line =~ s/muls\s+(\w+),\s*(\w+)\,\s*(\w+)/mul $1, $2, $3\n\tcmp $1, #0/g; - - # Convert "and r0, sp, #xx" into "mov r0, sp", "and r0, r0, #xx" - $line =~ s/and\s+(\w+),\s*(sp|r13)\,\s*#(\w+)/mov $1, $2\n\tand $1, $1, #$3/g; - - # Convert "ldr r0, [r0, r1, lsl #6]" where the shift is >3 (which - # can't be handled in thumb) into "add r0, r0, r1, lsl #6", - # "ldr r0, [r0]", for the special case where the same address is - # used as base and target for the ldr. - if ($line =~ /(ldr[bh]?)\s+(\w+),\s*\[\2,\s*(\w+),\s*lsl\s*#(\w+)\]/ and $4 > 3) { - $line =~ s/(ldr[bh]?)\s+(\w+),\s*\[\2,\s*(\w+),\s*lsl\s*#(\w+)\]/add $2, $2, $3, lsl #$4\n\t$1 $2, [$2]/; - } - - $line =~ s/\.arm/.thumb/x; - } - - # comment out unsupported directives - $line =~ s/\.type/$comm$&/x if $as_type =~ /^(apple-|armasm)/; - $line =~ s/\.func/$comm$&/x if $as_type =~ /^(apple-|clang)/; - $line =~ s/\.endfunc/$comm$&/x if $as_type =~ /^(apple-|clang)/; - $line =~ s/\.endfunc/ENDP/x if $as_type =~ /armasm/; - $line =~ s/\.ltorg/$comm$&/x if $as_type =~ /^(apple-|clang)/; - $line =~ s/\.ltorg/LTORG/x if $as_type eq "armasm"; - $line =~ s/\.size/$comm$&/x if $as_type =~ /^(apple-|armasm)/; - $line =~ s/\.fpu/$comm$&/x if $as_type =~ /^(apple-|armasm)/; - $line =~ s/\.arch/$comm$&/x if $as_type =~ /^(apple-|clang|armasm)/; - $line =~ s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/; - $line =~ s/.section\s+.note.GNU-stack.*/$comm$&/x if $as_type =~ /^(apple-|armasm)/; - - $line =~ s/\.syntax/$comm$&/x if $as_type =~ /armasm/; - - $line =~ s/\.hword/.short/x; - - if ($as_type =~ /^apple-/) { - # the syntax for these is a little different - $line =~ s/\.global/.globl/x; - # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const - $line =~ s/(.*)\.rodata/.const_data/x; - $line =~ s/\.int/.long/x; - $line =~ s/\.float/.single/x; - } - if ($as_type eq "apple-gas") { - $line =~ s/vmrs\s+APSR_nzcv/fmrx r15/x; - } - if ($as_type eq "armasm") { - $line =~ s/\.global/EXPORT/x; - $line =~ s/\.int/dcd/x; - $line =~ s/\.long/dcd/x; - $line =~ s/\.float/dcfs/x; - $line =~ s/\.word/dcd/x; - $line =~ s/\.short/dcw/x; - $line =~ s/\.byte/dcb/x; - $line =~ s/\.quad/dcq/x; - $line =~ s/\.ascii/dcb/x; - $line =~ s/\.asciz(.*)$/dcb\1,0/x; - $line =~ s/\.thumb/THUMB/x; - $line =~ s/\.arm/ARM/x; - # The alignment in AREA is the power of two, just as .align in gas - $line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=4, CODEALIGN/; - $line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/; - $line =~ s/\.data/AREA |.data|, DATA, ALIGN=5/; - } - if ($as_type eq "armasm" and $arch eq "arm") { - $line =~ s/fmxr/vmsr/; - $line =~ s/fmrx/vmrs/; - $line =~ s/fadds/vadd.f32/; - } - if ($as_type eq "armasm" and $arch eq "aarch64") { - # Convert "b.eq" into "beq" - $line =~ s/\bb\.($arm_cond_codes)\b/b\1/; - } - - # catch unknown section names that aren't mach-o style (with a comma) - if ($as_type =~ /apple-/ and $line =~ /.section ([^,]*)$/) { - die ".section $1 unsupported; figure out the mach-o section name and add it"; - } - - print ASMFILE $line; -} - -if ($as_type ne "armasm") { - print ASMFILE ".text\n"; - print ASMFILE ".align 2\n"; - foreach my $literal (keys %literal_labels) { - print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n"; - } - - map print(ASMFILE ".thumb_func $_\n"), - grep exists $thumb_labels{$_}, keys %call_targets; -} else { - map print(ASMFILE "\tIMPORT $_\n"), - grep ! exists $labels_seen{$_}, (keys %call_targets, keys %import_symbols); - - print ASMFILE "\tEND\n"; -} - -close(INPUT) or exit 1; -close(ASMFILE) or exit 1; -if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) { - system(@gcc_cmd) == 0 or die "Error running assembler"; -} - -END { - unlink($tempfile) if defined $tempfile; -} -#exit 1 |