#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>

/* I/O base. Note that this is the ARM physical address, not the 
 * virtual one or the "VC CPU Bus addresses".
 * As we are using /dev/mem for access, this should be the 
 * right address.
 */

/*
 *  This version uses memory-barriers
 *  Info taken from 
 *  http://www.jonmasters.org/blog/2012/11/13/arm-atomic-operations/
 */

#define MB()  __asm__ __volatile__ ("mcr p15, 0, r0, c7, c10, 5" : : : "memory")
#define RMB() MB()
#define WMB() MB()

#define IO_BASE          0x20000000 
/* PADS. They are in the power-management area, not the gpio area */  
#define GPIO_PADS        (IO_BASE + 0x100000)
#define PAGE_SIZE 	 4096

/*
 * Offset of the GPIO drive strength registers is
 * 0x2c for GPIO 0-27
 * 0x30 for GPIO 28-45 
 * 0x34 for GPIO 46-53
 * Or on other words GPIO-PADS + 11 + index
 * with index \in {0,1,2} and the calculation done with 
 * (uint32_t *).
 * 
 * Note that reading/writing different peripherials 
 * in sequence can lead to the results returning out-of-order.
 * The ARM core cannot deal with that correctly, so 
 * do not do it!
 * The solution is to insert memory-barriers via some
 * assembler wizardry.
 */ 

  
void main() {	
  uint32_t v0, v1, v2;
  int mem_file;
  uint8_t *mem;

  volatile uint32_t *pads; // must be volatile as externally changed

  mem_file = open("/dev/mem", O_RDWR | O_SYNC);
  if (mem_file == -1) {
    perror("error opening /dev/mem");
    exit(-1);
  }

  pads = (uint32_t *) 
         mmap(0,          // we do not care where this is mapped
              PAGE_SIZE,  // length to be mapped
              PROT_READ | PROT_WRITE, // allow reading and writing
              MAP_SHARED,
              mem_file,   // file to map, i.e. physical memory map
              GPIO_PADS); // Offset in the file. Multiple of PAGE_SIZE!                


  if ((int32_t)pads == -1) { // negative result is fine, except -1
    perror("mapping failed\n");
    exit(-1);
  }
  printf("mapped to: %p\n", pads);                          
  
  MB();
  v0 = *(pads + 11 + 0);
  v1 = *(pads + 11 + 1);
  v2 = *(pads + 11 + 2);    
  MB();    
      
  printf("v0:  %08x\n", v0);
  printf("v1:  %08x\n", v1);
  printf("v2:  %08x\n", v2);

}  