Support converting arm assembly to armasm syntax

Message ID 1394623482-28193-1-git-send-email-martin@martin.st
State Superseded
Headers show

Commit Message

Martin Storsjö March 12, 2014, 11:24 a.m.
This syntax is supported by the official arm tools and
by Microsoft's assembler.

This currently only supports microsoft's assembler, the
armasm assembler in RVCT requires a few more tweaks to
be able to build libav.

The preprocessing is done by invoking cpp (do we need to
be able to override this?).

The converted output is written to a file instead of using
a pipe, since Microsoft's armasm can't read the input from
a pipe.
---
 gas-preprocessor.pl | 274 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 245 insertions(+), 29 deletions(-)

Comments

Janne Grunau March 12, 2014, 8:02 p.m. | #1
On 2014-03-12 13:24:42 +0200, Martin Storsjö wrote:
> This syntax is supported by the official arm tools and
> by Microsoft's assembler.
> 
> This currently only supports microsoft's assembler, the
> armasm assembler in RVCT requires a few more tweaks to
> be able to build libav.
> 
> The preprocessing is done by invoking cpp (do we need to
> be able to override this?).
> 
> The converted output is written to a file instead of using
> a pipe, since Microsoft's armasm can't read the input from
> a pipe.

Does the name still makes sense? I guess it preprocesses
modern gas asm files so that other assemblers can use them.

If anyone has a good idea how the support for different
target assemblers could be made more modular without requiring
all the ifs scattered through the source it would be welcome.

I looked only lightly at the armasm specific parts, looks ok-ish
aside from my comments below.

Janne

> ---
>  gas-preprocessor.pl | 274 ++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 245 insertions(+), 29 deletions(-)
> 
> diff --git a/gas-preprocessor.pl b/gas-preprocessor.pl
> index a8f119a..50b5de9 100755
> --- a/gas-preprocessor.pl
> +++ b/gas-preprocessor.pl
> @@ -40,7 +40,7 @@ command. Following options are currently supported:
>  
>      -help         - this usage text
>      -arch         - target architecture
> -    -as-type      - one value out of {,apple-}{gas,clang}
> +    -as-type      - one value out of {{,apple-}{gas,clang},armasm}
>      -fix-unreq
>      -no-fix-unreq
>  ";
> @@ -79,7 +79,7 @@ while (@options) {
>          die "unkown arch: '$arch'\n" if not exists $comments{$arch};
>      } elsif ($opt eq "-as-type") {
>          $as_type = shift @options;
> -        die "unkown as type: '$as_type'\n" if $as_type !~ /^(apple-)?(gas|clang)$/;
> +        die "unkown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
>      } elsif ($opt eq "-help") {
>          usage();
>          exit 0;
> @@ -103,6 +103,25 @@ if (grep /\.c$/, @gcc_cmd) {
>  } else {
>      die "Unrecognized input filetype";
>  }
> +if ($as_type eq "armasm") {
> +
> +    $preprocess_c_cmd[0] = "cpp";
> +
> +    @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
> +    # Remove -ignore XX parameter pairs from preprocess_c_cmd
> +    my $index = 1;
> +    while ($index < $#preprocess_c_cmd) {
> +        if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) {
> +            splice(@preprocess_c_cmd, $index, 2);
> +            next;
> +        }
> +        $index++;
> +    }
> +    if (grep /^-MM$/, @preprocess_c_cmd) {
> +        system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
> +        exit 0;
> +    }
> +}
>  
>  # if compiling, avoid creating an output file named '-.o'
>  if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
> @@ -116,8 +135,27 @@ if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
>          }
>      }
>  }
> -@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
>  @preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd;
> +my $tempfile;
> +if ($as_type ne "armasm") {
> +    @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
> +} else {
> +    @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
> +    @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
> +
> +    my @outfiles = grep /\.o$/, @gcc_cmd;
> +    $tempfile = $outfiles[0].".asm";
> +
> +    # Remove most parameters from gcc_cmd, which actually is the armasm command,
> +    # which doesn't support any of the common compiler/preprocessor options.
> +    @gcc_cmd = grep ! /^-D/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-U/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-m/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-M/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-c$/, @gcc_cmd;
> +    @gcc_cmd = grep ! /^-I/, @gcc_cmd;
> +    @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd;
> +}
>  
>  # detect architecture from gcc binary name
>  if (!$arch) {
> @@ -167,23 +205,53 @@ my %symbols;
>  while (<ASMFILE>) {
>      # remove all comments (to avoid interfering with evaluating directives)
>      s/(?<!\\)$comm.*//x;
> +    # Strip out windows linefeeds
> +    s/\r$//;
> +    # Strip out line number comments - armasm can handle them in a separate
> +    # syntax, but since the line numbers are off they are only misleading.
> +    s/^#\s+(\d+).*//          if $as_type =~ /armasm/;
>  
>      # comment out unsupported directives
> -    s/\.type/$comm$&/x        if $as_type =~ /^apple-/;
> +    s/\.type/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
>      s/\.func/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
>      s/\.endfunc/$comm$&/x     if $as_type =~ /^(apple-|clang)/;
> -    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang)/;
> -    s/\.size/$comm$&/x        if $as_type =~ /^apple-/;
> -    s/\.fpu/$comm$&/x         if $as_type =~ /^apple-/;
> -    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
> -    s/\.object_arch/$comm$&/x if $as_type =~ /^apple-/;
> -
> -    # the syntax for these is a little different
> -    s/\.global/.globl/x       if $as_type =~ /apple-/;
> -    # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
> -    s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
> -    s/\.int/.long/x;
> -    s/\.float/.single/x;
> +    s/\.endfunc/ENDP/x        if $as_type =~ /armasm/;
> +    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang|armasm)/;
> +    s/\.size/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
> +    s/\.fpu/$comm$&/x         if $as_type =~ /^(apple-|armasm)/;
> +    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang|armasm)/;
> +    s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
> +
> +    s/\.syntax/$comm$&/x      if $as_type =~ /armasm/;
> +    s/\.thumb/THUMB/x         if $as_type =~ /armasm/;
> +    s/\.arm/ARM/x             if $as_type =~ /armasm/;

we use .arm|.thumb in the 2nd pass to set $thumb and that is not
modified, does thumb work with armasm?

Also if you have ideas how the as_type based commenting could be
improved. I wasn't really happy with it when I wrote it.

> +    # armasm uses a different comment character. We don't want to change
> +    # $comm originally since that matches what the input source uses.
> +    s/$comm/;/                if $as_type =~ /armasm/;
> +
> +    if ($as_type ne "armasm") {

I think this could be $as_type =~ /apple-/, no need to replace .int and .float
with modern gas and upstream clang, I didn't made them conditional since both
support .long/.single too

> +        # the syntax for these is a little different
> +        s/\.global/.globl/x       if $as_type =~ /apple-/;
> +        # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
> +        s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
> +        s/\.int/.long/x;
> +        s/\.float/.single/x;
> +    } else {
> +        s/\.global/EXPORT/x;
> +        s/\.int/dcd/x;
> +        s/\.long/dcd/x;
> +        s/\.float/dcfs/x;
> +        s/\.word/dcd/x;
> +        s/\.short/dcw/x;
> +        s/\.byte/dcb/x;
> +        # The alignment in AREA is the power of two, just as .align in gas
> +        s/\.text/AREA |.text|, CODE, READONLY, ALIGN=2, CODEALIGN/;
> +        s/(.*)\.rodata/AREA |.rodata|, DATA, READONLY, ALIGN=5/;
> +
> +        s/fmxr/vmsr/;
> +        s/fmrx/vmrs/;
> +        s/fadds/vadd/;
> +    }
>  
>      # catch unknown section names that aren't mach-o style (with a comma)
>      if ($as_type =~ /apple-/ and /.section ([^,]*)$/) {
> @@ -327,7 +395,9 @@ sub handle_set {
>      my $line = $_[0];
>      if ($line =~ /\.set\s+(.*),\s*(.*)/) {
>          $symbols{$1} = eval_expr($2);
> +        return 1;
>      }
> +    return 0;
>  }
>  
>  sub expand_macros {
> @@ -450,7 +520,11 @@ close(ASMFILE) or exit 1;
>  if ($ENV{GASPP_DEBUG}) {
>      open(ASMFILE, ">&STDOUT");
>  } else {
> -    open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
> +    if ($as_type ne "armasm") {
> +        open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
> +    } else {
> +        open(ASMFILE, ">", $tempfile);
> +    }
>  }
>  
>  my @sections;
> @@ -466,6 +540,7 @@ my $thumb = 0;
>  
>  my %thumb_labels;
>  my %call_targets;
> +my %mov32_targets;
>  
>  my @irp_args;
>  my $irp_param;
> @@ -473,6 +548,12 @@ my $irp_param;
>  my %neon_alias_reg;
>  my %neon_alias_type;
>  
> +my $temp_label_next = 0;
> +my %last_temp_labels;
> +my %next_temp_labels;
> +
> +my %labels_seen;
> +
>  my %aarch64_req_alias;
>  
>  # pass 2: parse .rept and .if variants
> @@ -492,7 +573,7 @@ foreach my $line (@pass1_lines) {
>      $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
>  
>      # handle ldr <reg>, =<expr>
> -    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
> +    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne "armasm") {
>          my $label = $literal_labels{$3};
>          if (!$label) {
>              $label = "Literal_$literal_num";
> @@ -500,7 +581,7 @@ foreach my $line (@pass1_lines) {
>              $literal_labels{$3} = $label;
>          }
>          $line = "$1 ldr$2, $label\n";
> -    } elsif ($line =~ /\.ltorg/) {
> +    } elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") {
>          $line .= ".align 2\n";
>          foreach my $literal (keys %literal_labels) {
>              $line .= "$literal_labels{$literal}:\n $literal_expr $literal\n";
> @@ -533,7 +614,7 @@ foreach my $line (@pass1_lines) {
>      }
>  
>      if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.globl)\s+(\w+)/ and
> -	$as_type ne "gas") {
> +	$as_type !~ "gas|armasm") {

this also excludes apple-gas

>          my $cond = $3;
>          my $label = $4;
>          # Don't interpret e.g. bic as b<cc> with ic as conditional code
> @@ -627,7 +708,8 @@ sub handle_serialized_line {
>          return if handle_if($line);
>      }
>  
> -    handle_set($line);
> +    # Strip out the .set lines from the armasm output
> +    return if (handle_set($line) and $as_type eq "armasm");
>  
>      if ($line =~ /\.unreq\s+(.*)/) {
>          if (defined $neon_alias_reg{$1}) {
> @@ -668,7 +750,7 @@ sub handle_serialized_line {
>          }
>      }
>  
> -    if ($arch eq "aarch64") {
> +    if ($arch eq "aarch64" or $as_type eq "armasm") {
>          # clang's integrated aarch64 assembler in Xcode 5 does not support .req/.unreq
>          if ($line =~ /\b(\w+)\s+\.req\s+(\w+)\b/) {
>              $aarch64_req_alias{$1} = $2;
> @@ -683,6 +765,8 @@ sub handle_serialized_line {
>              }
>              $line =~ s/\b$alias\b/$resolved/g;
>          }
> +    }
> +    if ($arch eq "aarch64") {
>          # fix missing aarch64 instructions in Xcode 5.1 (beta3)
>          # mov with vector arguments is not supported, use alias orr instead
>          if ($line =~ /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
> @@ -708,17 +792,149 @@ sub handle_serialized_line {
>          }
>      }
>  
> +    if ($as_type eq "armasm") {
> +        # Also replace variables set by .set
> +        foreach (keys %symbols) {
> +            my $sym = $_;
> +            $line =~ s/\b$sym\b/$symbols{$sym}/g;
> +        }
> +
> +        # Handle function declarations and keep track of the declared labels
> +        if ($line =~ s/^\s*\.func\s+(\w+)/$1 PROC/) {
> +            $labels_seen{$1} = 1;
> +        }
> +
> +        if ($line =~ s/^(\d+)://) {
> +            # Convert local labels into unique labels. armasm (at least in
> +            # RVCT) has something similar, but still different enough.
> +            # By converting to unique labels we avoid any possible
> +            # incompatibilities.
> +
> +            my $num = $1;
> +            foreach (@{$next_temp_labels{$num}}) {
> +                $line = "$_\n" . $line;
> +            }
> +            @next_temp_labels{$num} = ();
> +            my $name = "temp_label_$temp_label_next";
> +            $temp_label_next++;
> +            # The matching regexp above removes the label from the start of
> +            # the line (which might contain an instruction as well), readd
> +            # it on a separate line above it.
> +            $line = "$name:\n" . $line;
> +            $last_temp_labels{$num} = $name;
> +        }
> +
> +        if ($line =~ s/^(\w+):/$1/) {
> +            # Skip labels that have already been declared with a PROC,
> +            # labels must not be declared multiple times.
> +            return if (defined $labels_seen{$1});
> +            $labels_seen{$1} = 1;
> +        } elsif ($line !~ /(\w+) PROC/) {
> +            # If not a label, make sure the line starts with whitespace,
> +            # otherwise ms armasm interprets it incorrectly.
> +            $line =~ s/^[\.\w]/\t$&/;
> +        }
> +
> +
> +        # Check branch instructions
> +        if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?)\s+(\w+)/) {
> +            my $cond = $3;
> +            my $target = $4;
> +            # Don't interpret e.g. bic as b<cc> with ic as conditional code
> +            if ($cond !~ /|eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo/) {
> +                # Not actually a branch
> +            } elsif ($target =~ /(\d+)([bf])/) {
> +                # The target is a local label
> +                my $num = $1;
> +                my $dir = $2;
> +                if ($dir eq "b") {
> +                    $line =~ s/$target/$last_temp_labels{$num}/;
> +                } else {
> +                    my $name = "temp_label_$temp_label_next";
> +                    $temp_label_next++;
> +                    push(@{$next_temp_labels{$num}}, $name);
> +                    $line =~ s/$target/$name/;
> +                }
> +            } elsif ($target ne "lr" and
> +                     $target ne "ip" and
> +                     $target !~ /^[rav]\d+$/) {
> +                $call_targets{$target}++;
> +            }
> +        }
> +
> +        # ALIGN in armasm syntax is the actual number of bytes
> +        if ($line =~ /\.align\s+(\d+)/) {
> +            my $align = 1 << $1;
> +            $line =~ s/\.align\s(\d+)/ALIGN $align/;
> +        }
> +        # Convert gas style [r0, :128] into armasm [r0@128] alignment specification
> +        $line =~ s/\[([^\[]+),\s*:(\d+)\]/[$1\@$2]/g;
> +
> +        # armasm treats logical values {TRUE} and {FALSE} separately from
> +        # numeric values - logical operators and values can't be intermixed
> +        # with numerical values. Evaluate !<number> and (a <> b) into numbers,
> +        # let the assembler evaluate the rest of the expressions. This current
> +        # only works for cases when ! and <> are used with actual constant numbers,
> +        # we don't evaluate subexpressions here.
> +
> +        # Evaluate !<number>
> +        while ($line =~ /!\s*(\d+)/g) {
> +            my $val = ($1 != 0) ? 0 : 1;
> +            $line =~ s/!(\d+)/$val/;
> +        }
> +        # Evaluate (a > b)
> +        while ($line =~ /\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/) {
> +            my $val;
> +            if ($2 eq "<") {
> +                $val = ($1 < $3) ? 1 : 0;
> +            } else {
> +                $val = ($1 > $3) ? 1 : 0;
> +            }
> +            $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
> +        }
> +
> +        # Change a movw... #:lower16: into a mov32 pseudoinstruction
> +        $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
> +        # and remove the following, matching movt completely
> +        $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
> +
> +        if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
> +            $mov32_targets{$1}++;
> +        }
> +
> +        # Misc bugs/deficiencies:
> +        # armasm seems unable to parse e.g. "vmov s0, s1" without a type
> +        # qualifier, thus add .f32.
> +        $line =~ s/^(\s+(?:vmov|vadd))(\s+s)/$1.f32$2/;
> +        # armasm is unable to parse &0x - add spacing
> +        $line =~ s/&0x/& 0x/g;
> +    }
> +
>      print ASMFILE $line;
>  }
>  
> -print ASMFILE ".text\n";
> -print ASMFILE ".align 2\n";
> -foreach my $literal (keys %literal_labels) {
> -    print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
> -}
> +if ($as_type ne "armasm") {
> +    print ASMFILE ".text\n";
> +    print ASMFILE ".align 2\n";
> +    foreach my $literal (keys %literal_labels) {
> +        print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
> +    }
> +
> +    map print(ASMFILE ".thumb_func $_\n"),
> +        grep exists $thumb_labels{$_}, keys %call_targets;
> +} else {
> +    map print(ASMFILE "\tIMPORT $_\n"),
> +        grep ! exists $labels_seen{$_}, (keys %call_targets, keys %mov32_targets);
>  
> -map print(ASMFILE ".thumb_func $_\n"),
> -    grep exists $thumb_labels{$_}, keys %call_targets;
> +    print ASMFILE "\tEND\n";
> +}
>  
>  close(ASMFILE) or exit 1;
> +if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) {
> +    system(@gcc_cmd) == 0 or die "Error running assembler";
> +}
> +
> +END {
> +    unlink($tempfile) if defined $tempfile;
> +}
>  #exit 1
> -- 
> 1.8.1.2
> 
> _______________________________________________
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel
Martin Storsjö March 12, 2014, 8:41 p.m. | #2
On Wed, 12 Mar 2014, Janne Grunau wrote:

> On 2014-03-12 13:24:42 +0200, Martin Storsjö wrote:
>> This syntax is supported by the official arm tools and
>> by Microsoft's assembler.
>>
>> This currently only supports microsoft's assembler, the
>> armasm assembler in RVCT requires a few more tweaks to
>> be able to build libav.
>>
>> The preprocessing is done by invoking cpp (do we need to
>> be able to override this?).
>>
>> The converted output is written to a file instead of using
>> a pipe, since Microsoft's armasm can't read the input from
>> a pipe.
>
> Does the name still makes sense? I guess it preprocesses
> modern gas asm files so that other assemblers can use them.

I guess it still makes sense, I don't have any better suggestions, 
especially not that would be worth the hassle of changing.

> If anyone has a good idea how the support for different
> target assemblers could be made more modular without requiring
> all the ifs scattered through the source it would be welcome.

It might possibly be simplified a little by moving the directive 
reformatting/commenting out to the final stage of the preprocessing (so 
that most of it deals with the original gas syntax), and adding a bunch of 
flags for which parts of the conversion machinery to enable - so we don't 
need to check for e.g. aarch64 || armasm, but only check whether .req 
alias expansion should be done. I haven't thought it through to the end 
though how much could be gained by this.

> I looked only lightly at the armasm specific parts, looks ok-ish
> aside from my comments below.
>
> Janne
>
>> ---
>>  gas-preprocessor.pl | 274 ++++++++++++++++++++++++++++++++++++++++++++++------
>>  1 file changed, 245 insertions(+), 29 deletions(-)
>>
>> diff --git a/gas-preprocessor.pl b/gas-preprocessor.pl
>> index a8f119a..50b5de9 100755
>> --- a/gas-preprocessor.pl
>> +++ b/gas-preprocessor.pl
>> @@ -40,7 +40,7 @@ command. Following options are currently supported:
>>
>>      -help         - this usage text
>>      -arch         - target architecture
>> -    -as-type      - one value out of {,apple-}{gas,clang}
>> +    -as-type      - one value out of {{,apple-}{gas,clang},armasm}
>>      -fix-unreq
>>      -no-fix-unreq
>>  ";
>> @@ -79,7 +79,7 @@ while (@options) {
>>          die "unkown arch: '$arch'\n" if not exists $comments{$arch};
>>      } elsif ($opt eq "-as-type") {
>>          $as_type = shift @options;
>> -        die "unkown as type: '$as_type'\n" if $as_type !~ /^(apple-)?(gas|clang)$/;
>> +        die "unkown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
>>      } elsif ($opt eq "-help") {
>>          usage();
>>          exit 0;
>> @@ -103,6 +103,25 @@ if (grep /\.c$/, @gcc_cmd) {
>>  } else {
>>      die "Unrecognized input filetype";
>>  }
>> +if ($as_type eq "armasm") {
>> +
>> +    $preprocess_c_cmd[0] = "cpp";
>> +
>> +    @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
>> +    # Remove -ignore XX parameter pairs from preprocess_c_cmd
>> +    my $index = 1;
>> +    while ($index < $#preprocess_c_cmd) {
>> +        if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) {
>> +            splice(@preprocess_c_cmd, $index, 2);
>> +            next;
>> +        }
>> +        $index++;
>> +    }
>> +    if (grep /^-MM$/, @preprocess_c_cmd) {
>> +        system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
>> +        exit 0;
>> +    }
>> +}
>>
>>  # if compiling, avoid creating an output file named '-.o'
>>  if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
>> @@ -116,8 +135,27 @@ if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
>>          }
>>      }
>>  }
>> -@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
>>  @preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd;
>> +my $tempfile;
>> +if ($as_type ne "armasm") {
>> +    @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
>> +} else {
>> +    @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
>> +    @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
>> +
>> +    my @outfiles = grep /\.o$/, @gcc_cmd;
>> +    $tempfile = $outfiles[0].".asm";
>> +
>> +    # Remove most parameters from gcc_cmd, which actually is the armasm command,
>> +    # which doesn't support any of the common compiler/preprocessor options.
>> +    @gcc_cmd = grep ! /^-D/, @gcc_cmd;
>> +    @gcc_cmd = grep ! /^-U/, @gcc_cmd;
>> +    @gcc_cmd = grep ! /^-m/, @gcc_cmd;
>> +    @gcc_cmd = grep ! /^-M/, @gcc_cmd;
>> +    @gcc_cmd = grep ! /^-c$/, @gcc_cmd;
>> +    @gcc_cmd = grep ! /^-I/, @gcc_cmd;
>> +    @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd;
>> +}
>>
>>  # detect architecture from gcc binary name
>>  if (!$arch) {
>> @@ -167,23 +205,53 @@ my %symbols;
>>  while (<ASMFILE>) {
>>      # remove all comments (to avoid interfering with evaluating directives)
>>      s/(?<!\\)$comm.*//x;
>> +    # Strip out windows linefeeds
>> +    s/\r$//;
>> +    # Strip out line number comments - armasm can handle them in a separate
>> +    # syntax, but since the line numbers are off they are only misleading.
>> +    s/^#\s+(\d+).*//          if $as_type =~ /armasm/;
>>
>>      # comment out unsupported directives
>> -    s/\.type/$comm$&/x        if $as_type =~ /^apple-/;
>> +    s/\.type/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
>>      s/\.func/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
>>      s/\.endfunc/$comm$&/x     if $as_type =~ /^(apple-|clang)/;
>> -    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang)/;
>> -    s/\.size/$comm$&/x        if $as_type =~ /^apple-/;
>> -    s/\.fpu/$comm$&/x         if $as_type =~ /^apple-/;
>> -    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
>> -    s/\.object_arch/$comm$&/x if $as_type =~ /^apple-/;
>> -
>> -    # the syntax for these is a little different
>> -    s/\.global/.globl/x       if $as_type =~ /apple-/;
>> -    # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
>> -    s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
>> -    s/\.int/.long/x;
>> -    s/\.float/.single/x;
>> +    s/\.endfunc/ENDP/x        if $as_type =~ /armasm/;
>> +    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang|armasm)/;
>> +    s/\.size/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
>> +    s/\.fpu/$comm$&/x         if $as_type =~ /^(apple-|armasm)/;
>> +    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang|armasm)/;
>> +    s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
>> +
>> +    s/\.syntax/$comm$&/x      if $as_type =~ /armasm/;
>> +    s/\.thumb/THUMB/x         if $as_type =~ /armasm/;
>> +    s/\.arm/ARM/x             if $as_type =~ /armasm/;
>
> we use .arm|.thumb in the 2nd pass to set $thumb and that is not
> modified, does thumb work with armasm?

Yes, thumb works just fine there. In fact, with windows phone (and WinRT I 
guess as well) one is supposed to build everything in thumb mode, afaik 
the linker is missing some feature to have mixing of arm and thumb working 
properly. (Their compiler only produces thumb, and if linking in arm 
routines it occasionally works and occasionally fails.)

You're right that we miss to set $thumb = 1 for these cases though - the 
only thing that flag is used for is for adding the .thumb_func stuff 
(which shouldn't be used on armasm anyway), and for changing add into 
add.w for large immediates (which doesn't seem to be necessary).

What's your preferred way of solving this? Moving the directive 
reformatting from .thumb to THUMB etc (including all the other directive 
reformatting) to a stage later than the one that checks for .thumb/.code 
16 would probably be a quite big refactoring. The dirty way is changing 
that check into looking for ARM/THUMB for armasm, but that's not too 
nice...

> Also if you have ideas how the as_type based commenting could be
> improved. I wasn't really happy with it when I wrote it.

You mean how the ... if $as_type =~ are added at the end of the line? This 
solution kinda acceptable I think, the alternative would be to have it 
grouped up in larger if clauses for the different backends.

>> +    # armasm uses a different comment character. We don't want to change
>> +    # $comm originally since that matches what the input source uses.
>> +    s/$comm/;/                if $as_type =~ /armasm/;
>> +
>> +    if ($as_type ne "armasm") {
>
> I think this could be $as_type =~ /apple-/, no need to replace .int and .float
> with modern gas and upstream clang, I didn't made them conditional since both
> support .long/.single too

Right, I'll fix that.

>> +        # the syntax for these is a little different
>> +        s/\.global/.globl/x       if $as_type =~ /apple-/;
>> +        # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
>> +        s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
>> +        s/\.int/.long/x;
>> +        s/\.float/.single/x;
>> +    } else {
>> +        s/\.global/EXPORT/x;
>> +        s/\.int/dcd/x;
>> +        s/\.long/dcd/x;
>> +        s/\.float/dcfs/x;
>> +        s/\.word/dcd/x;
>> +        s/\.short/dcw/x;
>> +        s/\.byte/dcb/x;
>> +        # The alignment in AREA is the power of two, just as .align in gas
>> +        s/\.text/AREA |.text|, CODE, READONLY, ALIGN=2, CODEALIGN/;
>> +        s/(.*)\.rodata/AREA |.rodata|, DATA, READONLY, ALIGN=5/;
>> +
>> +        s/fmxr/vmsr/;
>> +        s/fmrx/vmrs/;
>> +        s/fadds/vadd/;
>> +    }
>>
>>      # catch unknown section names that aren't mach-o style (with a comma)
>>      if ($as_type =~ /apple-/ and /.section ([^,]*)$/) {
>> @@ -327,7 +395,9 @@ sub handle_set {
>>      my $line = $_[0];
>>      if ($line =~ /\.set\s+(.*),\s*(.*)/) {
>>          $symbols{$1} = eval_expr($2);
>> +        return 1;
>>      }
>> +    return 0;
>>  }
>>
>>  sub expand_macros {
>> @@ -450,7 +520,11 @@ close(ASMFILE) or exit 1;
>>  if ($ENV{GASPP_DEBUG}) {
>>      open(ASMFILE, ">&STDOUT");
>>  } else {
>> -    open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
>> +    if ($as_type ne "armasm") {
>> +        open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
>> +    } else {
>> +        open(ASMFILE, ">", $tempfile);
>> +    }
>>  }
>>
>>  my @sections;
>> @@ -466,6 +540,7 @@ my $thumb = 0;
>>
>>  my %thumb_labels;
>>  my %call_targets;
>> +my %mov32_targets;
>>
>>  my @irp_args;
>>  my $irp_param;
>> @@ -473,6 +548,12 @@ my $irp_param;
>>  my %neon_alias_reg;
>>  my %neon_alias_type;
>>
>> +my $temp_label_next = 0;
>> +my %last_temp_labels;
>> +my %next_temp_labels;
>> +
>> +my %labels_seen;
>> +
>>  my %aarch64_req_alias;
>>
>>  # pass 2: parse .rept and .if variants
>> @@ -492,7 +573,7 @@ foreach my $line (@pass1_lines) {
>>      $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
>>
>>      # handle ldr <reg>, =<expr>
>> -    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
>> +    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne "armasm") {
>>          my $label = $literal_labels{$3};
>>          if (!$label) {
>>              $label = "Literal_$literal_num";
>> @@ -500,7 +581,7 @@ foreach my $line (@pass1_lines) {
>>              $literal_labels{$3} = $label;
>>          }
>>          $line = "$1 ldr$2, $label\n";
>> -    } elsif ($line =~ /\.ltorg/) {
>> +    } elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") {
>>          $line .= ".align 2\n";
>>          foreach my $literal (keys %literal_labels) {
>>              $line .= "$literal_labels{$literal}:\n $literal_expr $literal\n";
>> @@ -533,7 +614,7 @@ foreach my $line (@pass1_lines) {
>>      }
>>
>>      if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.globl)\s+(\w+)/ and
>> -	$as_type ne "gas") {
>> +	$as_type !~ "gas|armasm") {
>
> this also excludes apple-gas

Thanks for noting it, will fix

// Martin
Martin Storsjö March 12, 2014, 8:48 p.m. | #3
On Wed, 12 Mar 2014, Martin Storsjö wrote:

> On Wed, 12 Mar 2014, Janne Grunau wrote:
>
>> On 2014-03-12 13:24:42 +0200, Martin Storsjö wrote:
>>> @@ -533,7 +614,7 @@ foreach my $line (@pass1_lines) {
>>>      }
>>>
>>>      if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.globl)\s+(\w+)/ 
>>> and
>>> -	$as_type ne "gas") {
>>> +	$as_type !~ "gas|armasm") {
>> 
>> this also excludes apple-gas
>
> Thanks for noting it, will fix

Actually, isn't most of this .thumb_func stuff apple specific? That is, we 
could change it into =~ /^apple-/ instead.

// Martin
Niels =?iso-8859-1?Q?M=F6ller?= March 13, 2014, 8:31 a.m. | #4
Janne Grunau <janne-libav@jannau.net> writes:

> If anyone has a good idea how the support for different
> target assemblers could be made more modular without requiring
> all the ifs scattered through the source it would be welcome.

I'm afraid this is not very helpful for refactoring gas-preprocessor.pl,
but I'll jump in none the less.

In GMP and Nettle, assembly files are preprocessed using m4 (I know some
people prefer cpp, but imho, it's a mistake to use cpp for preprocessing
anything but C source code, it's too tied to C tokenization. And some
advanced m4 features like diversions also come in handy when generating
pic references on some platforms). And then the definitions of some m4
macros depend on configure checks.

The configure related m4 macros are not mainly to support lot of
different assemblers, it's mostly used for ABI-dependent issues, like
how to do pic references, label prefixes, and the different x86_64
calling conventions between windows and everything else. But in some
places they're used also to work around assembler bugs, like generating
certain instructions as .byte directives.

Regards,
/Niels
Janne Grunau March 16, 2014, 7:47 p.m. | #5
On 2014-03-12 22:48:49 +0200, Martin Storsjö wrote:
> On Wed, 12 Mar 2014, Martin Storsjö wrote:
> 
> >On Wed, 12 Mar 2014, Janne Grunau wrote:
> >
> >>On 2014-03-12 13:24:42 +0200, Martin Storsjö wrote:
> >>>@@ -533,7 +614,7 @@ foreach my $line (@pass1_lines) {
> >>>     }
> >>>
> >>>     if ($line =~
> >>>/^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.globl)\s+(\w+)/ and
> >>>-	$as_type ne "gas") {
> >>>+	$as_type !~ "gas|armasm") {
> >>
> >>this also excludes apple-gas
> >
> >Thanks for noting it, will fix
> 
> Actually, isn't most of this .thumb_func stuff apple specific? That
> is, we could change it into =~ /^apple-/ instead.

yes, it is. I didn't tests armv7 clang with thumb. that's broken now
since it doesn't know .thumb_func

Janne

Patch

diff --git a/gas-preprocessor.pl b/gas-preprocessor.pl
index a8f119a..50b5de9 100755
--- a/gas-preprocessor.pl
+++ b/gas-preprocessor.pl
@@ -40,7 +40,7 @@  command. Following options are currently supported:
 
     -help         - this usage text
     -arch         - target architecture
-    -as-type      - one value out of {,apple-}{gas,clang}
+    -as-type      - one value out of {{,apple-}{gas,clang},armasm}
     -fix-unreq
     -no-fix-unreq
 ";
@@ -79,7 +79,7 @@  while (@options) {
         die "unkown arch: '$arch'\n" if not exists $comments{$arch};
     } elsif ($opt eq "-as-type") {
         $as_type = shift @options;
-        die "unkown as type: '$as_type'\n" if $as_type !~ /^(apple-)?(gas|clang)$/;
+        die "unkown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
     } elsif ($opt eq "-help") {
         usage();
         exit 0;
@@ -103,6 +103,25 @@  if (grep /\.c$/, @gcc_cmd) {
 } else {
     die "Unrecognized input filetype";
 }
+if ($as_type eq "armasm") {
+
+    $preprocess_c_cmd[0] = "cpp";
+
+    @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
+    # Remove -ignore XX parameter pairs from preprocess_c_cmd
+    my $index = 1;
+    while ($index < $#preprocess_c_cmd) {
+        if ($preprocess_c_cmd[$index] eq "-ignore" and $index + 1 < $#preprocess_c_cmd) {
+            splice(@preprocess_c_cmd, $index, 2);
+            next;
+        }
+        $index++;
+    }
+    if (grep /^-MM$/, @preprocess_c_cmd) {
+        system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
+        exit 0;
+    }
+}
 
 # if compiling, avoid creating an output file named '-.o'
 if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
@@ -116,8 +135,27 @@  if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
         }
     }
 }
-@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
 @preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd;
+my $tempfile;
+if ($as_type ne "armasm") {
+    @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
+} else {
+    @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
+    @preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
+
+    my @outfiles = grep /\.o$/, @gcc_cmd;
+    $tempfile = $outfiles[0].".asm";
+
+    # Remove most parameters from gcc_cmd, which actually is the armasm command,
+    # which doesn't support any of the common compiler/preprocessor options.
+    @gcc_cmd = grep ! /^-D/, @gcc_cmd;
+    @gcc_cmd = grep ! /^-U/, @gcc_cmd;
+    @gcc_cmd = grep ! /^-m/, @gcc_cmd;
+    @gcc_cmd = grep ! /^-M/, @gcc_cmd;
+    @gcc_cmd = grep ! /^-c$/, @gcc_cmd;
+    @gcc_cmd = grep ! /^-I/, @gcc_cmd;
+    @gcc_cmd = map { /\.S$/ ? $tempfile : $_ } @gcc_cmd;
+}
 
 # detect architecture from gcc binary name
 if (!$arch) {
@@ -167,23 +205,53 @@  my %symbols;
 while (<ASMFILE>) {
     # remove all comments (to avoid interfering with evaluating directives)
     s/(?<!\\)$comm.*//x;
+    # Strip out windows linefeeds
+    s/\r$//;
+    # Strip out line number comments - armasm can handle them in a separate
+    # syntax, but since the line numbers are off they are only misleading.
+    s/^#\s+(\d+).*//          if $as_type =~ /armasm/;
 
     # comment out unsupported directives
-    s/\.type/$comm$&/x        if $as_type =~ /^apple-/;
+    s/\.type/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
     s/\.func/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
     s/\.endfunc/$comm$&/x     if $as_type =~ /^(apple-|clang)/;
-    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang)/;
-    s/\.size/$comm$&/x        if $as_type =~ /^apple-/;
-    s/\.fpu/$comm$&/x         if $as_type =~ /^apple-/;
-    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang)/;
-    s/\.object_arch/$comm$&/x if $as_type =~ /^apple-/;
-
-    # the syntax for these is a little different
-    s/\.global/.globl/x       if $as_type =~ /apple-/;
-    # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
-    s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
-    s/\.int/.long/x;
-    s/\.float/.single/x;
+    s/\.endfunc/ENDP/x        if $as_type =~ /armasm/;
+    s/\.ltorg/$comm$&/x       if $as_type =~ /^(apple-|clang|armasm)/;
+    s/\.size/$comm$&/x        if $as_type =~ /^(apple-|armasm)/;
+    s/\.fpu/$comm$&/x         if $as_type =~ /^(apple-|armasm)/;
+    s/\.arch/$comm$&/x        if $as_type =~ /^(apple-|clang|armasm)/;
+    s/\.object_arch/$comm$&/x if $as_type =~ /^(apple-|armasm)/;
+
+    s/\.syntax/$comm$&/x      if $as_type =~ /armasm/;
+    s/\.thumb/THUMB/x         if $as_type =~ /armasm/;
+    s/\.arm/ARM/x             if $as_type =~ /armasm/;
+    # armasm uses a different comment character. We don't want to change
+    # $comm originally since that matches what the input source uses.
+    s/$comm/;/                if $as_type =~ /armasm/;
+
+    if ($as_type ne "armasm") {
+        # the syntax for these is a little different
+        s/\.global/.globl/x       if $as_type =~ /apple-/;
+        # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
+        s/(.*)\.rodata/.const_data/x if $as_type =~ /apple-/;
+        s/\.int/.long/x;
+        s/\.float/.single/x;
+    } else {
+        s/\.global/EXPORT/x;
+        s/\.int/dcd/x;
+        s/\.long/dcd/x;
+        s/\.float/dcfs/x;
+        s/\.word/dcd/x;
+        s/\.short/dcw/x;
+        s/\.byte/dcb/x;
+        # The alignment in AREA is the power of two, just as .align in gas
+        s/\.text/AREA |.text|, CODE, READONLY, ALIGN=2, CODEALIGN/;
+        s/(.*)\.rodata/AREA |.rodata|, DATA, READONLY, ALIGN=5/;
+
+        s/fmxr/vmsr/;
+        s/fmrx/vmrs/;
+        s/fadds/vadd/;
+    }
 
     # catch unknown section names that aren't mach-o style (with a comma)
     if ($as_type =~ /apple-/ and /.section ([^,]*)$/) {
@@ -327,7 +395,9 @@  sub handle_set {
     my $line = $_[0];
     if ($line =~ /\.set\s+(.*),\s*(.*)/) {
         $symbols{$1} = eval_expr($2);
+        return 1;
     }
+    return 0;
 }
 
 sub expand_macros {
@@ -450,7 +520,11 @@  close(ASMFILE) or exit 1;
 if ($ENV{GASPP_DEBUG}) {
     open(ASMFILE, ">&STDOUT");
 } else {
-    open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
+    if ($as_type ne "armasm") {
+        open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
+    } else {
+        open(ASMFILE, ">", $tempfile);
+    }
 }
 
 my @sections;
@@ -466,6 +540,7 @@  my $thumb = 0;
 
 my %thumb_labels;
 my %call_targets;
+my %mov32_targets;
 
 my @irp_args;
 my $irp_param;
@@ -473,6 +548,12 @@  my $irp_param;
 my %neon_alias_reg;
 my %neon_alias_type;
 
+my $temp_label_next = 0;
+my %last_temp_labels;
+my %next_temp_labels;
+
+my %labels_seen;
+
 my %aarch64_req_alias;
 
 # pass 2: parse .rept and .if variants
@@ -492,7 +573,7 @@  foreach my $line (@pass1_lines) {
     $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
 
     # handle ldr <reg>, =<expr>
-    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
+    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/ and $as_type ne "armasm") {
         my $label = $literal_labels{$3};
         if (!$label) {
             $label = "Literal_$literal_num";
@@ -500,7 +581,7 @@  foreach my $line (@pass1_lines) {
             $literal_labels{$3} = $label;
         }
         $line = "$1 ldr$2, $label\n";
-    } elsif ($line =~ /\.ltorg/) {
+    } elsif ($line =~ /\.ltorg/ and $as_type ne "armasm") {
         $line .= ".align 2\n";
         foreach my $literal (keys %literal_labels) {
             $line .= "$literal_labels{$literal}:\n $literal_expr $literal\n";
@@ -533,7 +614,7 @@  foreach my $line (@pass1_lines) {
     }
 
     if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?|\.globl)\s+(\w+)/ and
-	$as_type ne "gas") {
+	$as_type !~ "gas|armasm") {
         my $cond = $3;
         my $label = $4;
         # Don't interpret e.g. bic as b<cc> with ic as conditional code
@@ -627,7 +708,8 @@  sub handle_serialized_line {
         return if handle_if($line);
     }
 
-    handle_set($line);
+    # Strip out the .set lines from the armasm output
+    return if (handle_set($line) and $as_type eq "armasm");
 
     if ($line =~ /\.unreq\s+(.*)/) {
         if (defined $neon_alias_reg{$1}) {
@@ -668,7 +750,7 @@  sub handle_serialized_line {
         }
     }
 
-    if ($arch eq "aarch64") {
+    if ($arch eq "aarch64" or $as_type eq "armasm") {
         # clang's integrated aarch64 assembler in Xcode 5 does not support .req/.unreq
         if ($line =~ /\b(\w+)\s+\.req\s+(\w+)\b/) {
             $aarch64_req_alias{$1} = $2;
@@ -683,6 +765,8 @@  sub handle_serialized_line {
             }
             $line =~ s/\b$alias\b/$resolved/g;
         }
+    }
+    if ($arch eq "aarch64") {
         # fix missing aarch64 instructions in Xcode 5.1 (beta3)
         # mov with vector arguments is not supported, use alias orr instead
         if ($line =~ /^\s*mov\s+(v\d[\.{}\[\]\w]+),\s*(v\d[\.{}\[\]\w]+)\b\s*$/) {
@@ -708,17 +792,149 @@  sub handle_serialized_line {
         }
     }
 
+    if ($as_type eq "armasm") {
+        # Also replace variables set by .set
+        foreach (keys %symbols) {
+            my $sym = $_;
+            $line =~ s/\b$sym\b/$symbols{$sym}/g;
+        }
+
+        # Handle function declarations and keep track of the declared labels
+        if ($line =~ s/^\s*\.func\s+(\w+)/$1 PROC/) {
+            $labels_seen{$1} = 1;
+        }
+
+        if ($line =~ s/^(\d+)://) {
+            # Convert local labels into unique labels. armasm (at least in
+            # RVCT) has something similar, but still different enough.
+            # By converting to unique labels we avoid any possible
+            # incompatibilities.
+
+            my $num = $1;
+            foreach (@{$next_temp_labels{$num}}) {
+                $line = "$_\n" . $line;
+            }
+            @next_temp_labels{$num} = ();
+            my $name = "temp_label_$temp_label_next";
+            $temp_label_next++;
+            # The matching regexp above removes the label from the start of
+            # the line (which might contain an instruction as well), readd
+            # it on a separate line above it.
+            $line = "$name:\n" . $line;
+            $last_temp_labels{$num} = $name;
+        }
+
+        if ($line =~ s/^(\w+):/$1/) {
+            # Skip labels that have already been declared with a PROC,
+            # labels must not be declared multiple times.
+            return if (defined $labels_seen{$1});
+            $labels_seen{$1} = 1;
+        } elsif ($line !~ /(\w+) PROC/) {
+            # If not a label, make sure the line starts with whitespace,
+            # otherwise ms armasm interprets it incorrectly.
+            $line =~ s/^[\.\w]/\t$&/;
+        }
+
+
+        # Check branch instructions
+        if ($line =~ /^\s*((\w+\s*:\s*)?bl?x?(..)?(?:\.w)?)\s+(\w+)/) {
+            my $cond = $3;
+            my $target = $4;
+            # Don't interpret e.g. bic as b<cc> with ic as conditional code
+            if ($cond !~ /|eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo/) {
+                # Not actually a branch
+            } elsif ($target =~ /(\d+)([bf])/) {
+                # The target is a local label
+                my $num = $1;
+                my $dir = $2;
+                if ($dir eq "b") {
+                    $line =~ s/$target/$last_temp_labels{$num}/;
+                } else {
+                    my $name = "temp_label_$temp_label_next";
+                    $temp_label_next++;
+                    push(@{$next_temp_labels{$num}}, $name);
+                    $line =~ s/$target/$name/;
+                }
+            } elsif ($target ne "lr" and
+                     $target ne "ip" and
+                     $target !~ /^[rav]\d+$/) {
+                $call_targets{$target}++;
+            }
+        }
+
+        # ALIGN in armasm syntax is the actual number of bytes
+        if ($line =~ /\.align\s+(\d+)/) {
+            my $align = 1 << $1;
+            $line =~ s/\.align\s(\d+)/ALIGN $align/;
+        }
+        # Convert gas style [r0, :128] into armasm [r0@128] alignment specification
+        $line =~ s/\[([^\[]+),\s*:(\d+)\]/[$1\@$2]/g;
+
+        # armasm treats logical values {TRUE} and {FALSE} separately from
+        # numeric values - logical operators and values can't be intermixed
+        # with numerical values. Evaluate !<number> and (a <> b) into numbers,
+        # let the assembler evaluate the rest of the expressions. This current
+        # only works for cases when ! and <> are used with actual constant numbers,
+        # we don't evaluate subexpressions here.
+
+        # Evaluate !<number>
+        while ($line =~ /!\s*(\d+)/g) {
+            my $val = ($1 != 0) ? 0 : 1;
+            $line =~ s/!(\d+)/$val/;
+        }
+        # Evaluate (a > b)
+        while ($line =~ /\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/) {
+            my $val;
+            if ($2 eq "<") {
+                $val = ($1 < $3) ? 1 : 0;
+            } else {
+                $val = ($1 > $3) ? 1 : 0;
+            }
+            $line =~ s/\(\s*(\d+)\s*([<>])\s*(\d+)\s*\)/$val/;
+        }
+
+        # Change a movw... #:lower16: into a mov32 pseudoinstruction
+        $line =~ s/^(\s*)movw(\s+\w+\s*,\s*)\#:lower16:(.*)$/$1mov32$2$3/;
+        # and remove the following, matching movt completely
+        $line =~ s/^\s*movt\s+\w+\s*,\s*\#:upper16:.*$//;
+
+        if ($line =~ /^\s*mov32\s+\w+,\s*([a-zA-Z]\w*)/) {
+            $mov32_targets{$1}++;
+        }
+
+        # Misc bugs/deficiencies:
+        # armasm seems unable to parse e.g. "vmov s0, s1" without a type
+        # qualifier, thus add .f32.
+        $line =~ s/^(\s+(?:vmov|vadd))(\s+s)/$1.f32$2/;
+        # armasm is unable to parse &0x - add spacing
+        $line =~ s/&0x/& 0x/g;
+    }
+
     print ASMFILE $line;
 }
 
-print ASMFILE ".text\n";
-print ASMFILE ".align 2\n";
-foreach my $literal (keys %literal_labels) {
-    print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
-}
+if ($as_type ne "armasm") {
+    print ASMFILE ".text\n";
+    print ASMFILE ".align 2\n";
+    foreach my $literal (keys %literal_labels) {
+        print ASMFILE "$literal_labels{$literal}:\n $literal_expr $literal\n";
+    }
+
+    map print(ASMFILE ".thumb_func $_\n"),
+        grep exists $thumb_labels{$_}, keys %call_targets;
+} else {
+    map print(ASMFILE "\tIMPORT $_\n"),
+        grep ! exists $labels_seen{$_}, (keys %call_targets, keys %mov32_targets);
 
-map print(ASMFILE ".thumb_func $_\n"),
-    grep exists $thumb_labels{$_}, keys %call_targets;
+    print ASMFILE "\tEND\n";
+}
 
 close(ASMFILE) or exit 1;
+if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) {
+    system(@gcc_cmd) == 0 or die "Error running assembler";
+}
+
+END {
+    unlink($tempfile) if defined $tempfile;
+}
 #exit 1