Assembly

#25
TIOBE#19
GitHub#29
IEEESpectrum#33
programming languagelow-level languagesystem programmingembedded developmentperformance optimizationmachine language

Programming Language

Assembly

Overview

Assembly language is a low-level programming language that represents CPU machine instructions in a human-readable form.

Details

Assembly language is a low-level programming language that allows writing CPU machine instructions in a human-understandable form using mnemonics. Each CPU architecture (x86, ARM, MIPS, RISC-V, etc.) has its own unique instruction set and syntax, enabling programming at the closest level to hardware. It provides direct memory management, register manipulation, and hardware control capabilities, achieving the highest level of performance and precise control. It is used in embedded systems, device drivers, operating system kernels, real-time systems, security research, and reverse engineering. In modern development, it's often used for optimizing critical parts of high-level languages or when utilizing specific hardware features, with inline assembly combined with C/C++ being widely utilized.

Code Examples

Hello World (x86-64 Linux)

# AT&T syntax (GAS)
.section .data
    msg: .ascii "Hello, World!\n"
    msg_len = . - msg

.section .text
    .global _start

_start:
    # write system call
    mov $1, %rax        # sys_write
    mov $1, %rdi        # stdout
    mov $msg, %rsi      # message address
    mov $msg_len, %rdx  # message length
    syscall

    # exit system call
    mov $60, %rax       # sys_exit
    mov $0, %rdi        # exit code
    syscall
; Intel syntax (NASM)
section .data
    msg db 'Hello, World!', 0xA
    msg_len equ $ - msg

section .text
    global _start

_start:
    ; write system call
    mov rax, 1          ; sys_write
    mov rdi, 1          ; stdout
    mov rsi, msg        ; message address
    mov rdx, msg_len    ; message length
    syscall

    ; exit system call
    mov rax, 60         ; sys_exit
    mov rdi, 0          ; exit code
    syscall

Basic Data Operations

# x86-64 AT&T syntax
.section .data
    number1: .quad 10
    number2: .quad 20
    result:  .quad 0

.section .text
    .global _start

_start:
    # Load data into registers
    movq number1(%rip), %rax    # Load number1 into %rax
    movq number2(%rip), %rbx    # Load number2 into %rbx
    
    # Addition operation
    addq %rbx, %rax             # %rax = %rax + %rbx
    
    # Store result
    movq %rax, result(%rip)     # Store result in memory
    
    # Subtraction operation
    movq number1(%rip), %rcx
    subq number2(%rip), %rcx    # %rcx = number1 - number2
    
    # Multiplication operation
    movq number1(%rip), %rax
    movq number2(%rip), %rbx
    imulq %rbx, %rax            # %rax = %rax * %rbx
    
    # Bitwise operations
    movq $0xFF, %rdx
    andq $0x0F, %rdx            # AND operation
    orq  $0xF0, %rdx            # OR operation
    xorq $0xFF, %rdx            # XOR operation
    
    # Exit
    mov $60, %rax
    mov $0, %rdi
    syscall

Conditional Branching and Jumps

# x86-64 conditional branching
.section .data
    value1: .quad 10
    value2: .quad 20

.section .text
    .global _start

_start:
    # Compare values
    movq value1(%rip), %rax
    movq value2(%rip), %rbx
    cmpq %rbx, %rax             # Compare %rax and %rbx
    
    # Conditional jumps
    je equal                    # Jump if equal
    jg greater                  # Jump if greater
    jl less                     # Jump if less
    jmp end                     # Unconditional jump

equal:
    # Process when equal
    movq $1, %rcx
    jmp end

greater:
    # Process when greater
    movq $2, %rcx
    jmp end

less:
    # Process when less
    movq $3, %rcx
    jmp end

end:
    # Program termination
    mov $60, %rax
    mov $0, %rdi
    syscall

Loop Processing

# Counter loop example
.section .data
    counter: .quad 5
    sum:     .quad 0

.section .text
    .global _start

_start:
    movq $0, %rax               # Initialize sum
    movq counter(%rip), %rcx    # Set counter

loop_start:
    cmpq $0, %rcx               # Check if counter is 0
    je loop_end                 # Exit if 0
    
    addq %rcx, %rax             # Add current value to sum
    decq %rcx                   # Decrement counter
    jmp loop_start              # Return to loop start

loop_end:
    movq %rax, sum(%rip)        # Store result
    
    # Program termination
    mov $60, %rax
    mov $0, %rdi
    syscall

# While loop style
while_loop_example:
    movq $10, %rbx              # Initial value

while_start:
    cmpq $0, %rbx               # Condition check
    jle while_end               # Exit if <= 0
    
    # Loop body processing
    # Some processing...
    
    decq %rbx                   # Decrement value
    jmp while_start             # Continue loop

while_end:
    ret                         # Function return

Function Definition and Calling

# Function definition examples
.section .text
    .global _start
    .global add_numbers
    .global factorial

# Function to add two numbers
add_numbers:
    # Arguments: %rdi (1st arg), %rsi (2nd arg)
    # Return value: %rax
    movq %rdi, %rax
    addq %rsi, %rax
    ret

# Factorial function (recursive)
factorial:
    # Argument: %rdi (n)
    # Return value: %rax
    
    # Base case: n <= 1
    cmpq $1, %rdi
    jle base_case
    
    # Recursive call
    pushq %rdi                  # Save n on stack
    decq %rdi                   # Calculate n-1
    call factorial              # Call factorial(n-1)
    popq %rdi                   # Restore n
    
    # n * factorial(n-1)
    imulq %rdi, %rax
    ret

base_case:
    movq $1, %rax
    ret

_start:
    # Call add_numbers function
    movq $10, %rdi
    movq $20, %rsi
    call add_numbers
    
    # Result is stored in %rax
    
    # Call factorial function
    movq $5, %rdi
    call factorial
    
    # Program termination
    mov $60, %rax
    mov $0, %rdi
    syscall

Stack Operations and Local Variables

# Function using stack frame
.section .text
    .global complex_function

complex_function:
    # Function prologue
    pushq %rbp                  # Save old base pointer
    movq %rsp, %rbp             # Set new base pointer
    subq $32, %rsp              # Allocate stack space for local variables
    
    # Local variable usage
    # -8(%rbp)  : 1st local variable
    # -16(%rbp) : 2nd local variable
    # -24(%rbp) : 3rd local variable
    # -32(%rbp) : 4th local variable
    
    movq $100, -8(%rbp)         # local_var1 = 100
    movq $200, -16(%rbp)        # local_var2 = 200
    
    # Calculation using local variables
    movq -8(%rbp), %rax
    addq -16(%rbp), %rax
    movq %rax, -24(%rbp)        # Store result in local_var3
    
    # Save and restore registers
    pushq %r12                  # Save registers to be used
    pushq %r13
    
    # Some processing...
    movq -24(%rbp), %r12
    imulq $2, %r12
    
    # Restore registers
    popq %r13
    popq %r12
    
    # Set return value
    movq -24(%rbp), %rax
    
    # Function epilogue
    movq %rbp, %rsp             # Restore stack pointer
    popq %rbp                   # Restore base pointer
    ret

Inline Assembly (with C)

// Inline assembly within C language
#include <stdio.h>

int main() {
    int input = 10;
    int result;
    
    // GCC inline assembly
    asm volatile (
        "movl %1, %%eax\n\t"        // Move input to %eax
        "imull $2, %%eax\n\t"       // Multiply by 2
        "addl $5, %%eax\n\t"        // Add 5
        "movl %%eax, %0"            // Store result
        : "=r" (result)             // Output operand
        : "r" (input)               // Input operand
        : "eax"                     // Clobbered register
    );
    
    printf("Result: %d\n", result); // Outputs 25
    
    // CPUID instruction example
    unsigned int eax, ebx, ecx, edx;
    asm volatile (
        "cpuid"
        : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
        : "a" (0)
    );
    
    printf("CPU ID: %08X %08X %08X\n", ebx, edx, ecx);
    
    return 0;
}

SIMD Instructions (SSE/AVX)

# SIMD (Single Instruction, Multiple Data) instruction examples
.section .data
    # Array of four 32-bit floating point numbers
    array1: .float 1.0, 2.0, 3.0, 4.0
    array2: .float 5.0, 6.0, 7.0, 8.0
    result: .space 16                   # Result area

.section .text
    .global simd_example

simd_example:
    # Parallel operations using SSE instructions
    movups array1(%rip), %xmm0          # Load array1 into XMM register
    movups array2(%rip), %xmm1          # Load array2 into XMM register
    
    addps %xmm1, %xmm0                  # Add four floating point numbers simultaneously
    
    movups %xmm0, result(%rip)          # Store result
    
    # AVX instruction example (256-bit)
    # vmovups array1(%rip), %ymm0       # Eight 32-bit floating point numbers
    # vmovups array2(%rip), %ymm1
    # vaddps %ymm1, %ymm0, %ymm0        # AVX addition
    
    ret

Special Features

Direct Hardware Control

# Port I/O (x86 specific)
.section .text
    .global port_io_example

port_io_example:
    # Read data from port
    movw $0x3F8, %dx               # COM port address
    inb %dx, %al                   # Read 1 byte from port
    
    # Write data to port
    movb $0x48, %al                # ASCII code for 'H'
    outb %al, %dx                  # Write 1 byte to port
    
    # Memory-mapped I/O
    movq $0xB8000, %rdi            # VGA text memory address
    movb $0x41, (%rdi)             # Display 'A' on screen
    movb $0x07, 1(%rdi)            # Attribute (white text, black background)
    
    ret

Interrupt Handlers

# Interrupt handler example
.section .text
    .global interrupt_handler

interrupt_handler:
    # Save registers
    pushq %rax
    pushq %rbx
    pushq %rcx
    pushq %rdx
    pushq %rsi
    pushq %rdi
    pushq %r8
    pushq %r9
    pushq %r10
    pushq %r11
    
    # Interrupt processing
    # Write actual interrupt processing here
    
    # Send EOI (End of Interrupt)
    movb $0x20, %al
    outb %al, $0x20                # Send EOI to PIC
    
    # Restore registers
    popq %r11
    popq %r10
    popq %r9
    popq %r8
    popq %rdi
    popq %rsi
    popq %rdx
    popq %rcx
    popq %rbx
    popq %rax
    
    iretq                          # Return from interrupt

Atomic Operations

# Atomic operations in multiprocessor environment
.section .data
    shared_counter: .quad 0
    lock_var: .quad 0

.section .text
    .global atomic_increment

atomic_increment:
    # Atomic increment
    lock incq shared_counter(%rip)  # Ensure atomicity with LOCK prefix
    
    # Compare and Swap (CAS)
    movq $0, %rax                   # Expected value
    movq $1, %rbx                   # New value
    lock cmpxchgq %rbx, lock_var(%rip)
    
    # Test and Set
    movq $1, %rax
    lock xchgq %rax, lock_var(%rip)
    
    ret

Architecture-Specific Features

ARM Assembly (AArch64)

// ARM64 assembly example
.global _start

.section .data
    msg: .ascii "Hello, ARM!\n"
    msg_len = . - msg

.section .text
_start:
    // Set system call number and arguments
    mov x8, #64          // sys_write
    mov x0, #1           // stdout
    ldr x1, =msg         // message address
    mov x2, #msg_len     // message length
    svc #0               // execute system call
    
    // Program termination
    mov x8, #93          // sys_exit
    mov x0, #0           // exit code
    svc #0

// Conditional execution in ARM64
conditional_example:
    cmp x0, x1
    b.eq equal           // if equal
    b.gt greater         // if greater
    b.lt less            // if less
    
equal:
    mov x2, #1
    ret
    
greater:
    mov x2, #2
    ret
    
less:
    mov x2, #3
    ret

Versions and Implementations

Architecture Instruction Set Main Usage Features
x86-64 x86_64, AMD64 Desktop, Server CISC, Rich instructions
ARM ARMv8-A (AArch64) Mobile, Embedded RISC, Power efficient
RISC-V RV32I, RV64I Academic, Embedded Open source, Modular
MIPS MIPS32, MIPS64 Embedded, Education RISC, Simple
PowerPC PowerPC64 Server, Embedded RISC, High performance

Reference Pages

Official Documentation

Learning Resources

Development Tools