Compile the following code
package main
func main() {
println("Hello Go")
}
Find the entry point of the Go executable#
Use readelf
to view the entry address of the executable binary, and use the nm tool of the Go toolchain to find the function name corresponding to the address in the symbol table.
$ readelf -h ./hello
ELF Header:
...
Entry point address: 0x454020
...
$ go tool nm ./hello| grep 454020
454020 T _rt0_amd64_linux
Startup Phase#
In the Go source code, you can find the file where _rt0_amd64_linux
is located. The work done at the start of the program is written in the assembly code corresponding to the platform, mainly in two files: runtime/rt0_linux_amd64.s
and runtime/asm_amd64.s
.
TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8
JMP _rt0_amd64(SB)
TEXT _rt0_amd64(SB),NOSPLIT,$-8
MOVQ 0(SP), DI // argc
JMP runtime·rt0_go(SB)
TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
// ...
// Set up the stack space for g0
MOVQ $runtime·g0(SB), DI
LEAQ (-64*1024)(SP), BX
MOVQ BX, g_stackguard0(DI)
MOVQ BX, g_stackguard1(DI)
MOVQ BX, (g_stack+stack_lo)(DI)
MOVQ SP, (g_stack+stack_hi)(DI)
// Use arch_prctl(ARCH_SET_FS) to set the TLS base address of the m0 thread to m0.tls
LEAQ runtime·m0+m_tls(SB), DI
CALL runtime·settls(SB)
// Perform basic correctness and safety checks
// Including: size of basic types, size of platform pointer types, CAS correctness checks
CALL runtime·check(SB)
MOVL 24(SP), AX // copy argc
MOVL AX, 0(SP)
MOVQ 32(SP), AX // copy argv
MOVQ AX, 8(SP)
CALL runtime·args(SB)
CALL runtime·osinit(SB)
CALL runtime·schedinit(SB)
// The address of the runtime.main function is passed to newproc
// This adds runtime.main to the run queue of a p
MOVQ $runtime·mainPC(SB), AX // entry
PUSHQ AX
CALL runtime·newproc(SB)
POPQ AX
// The main thread executes the schedule scheduling loop
// runtime.main will be scheduled for execution
// runtime.main will internally call main.main
CALL runtime·mstart(SB)
// mstart should not return, so an error is reported directly here
CALL runtime·abort(SB)
RET
TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
CALL runtime·mstart0(SB)
RET // not reached
runtime.args#
// runtime/runtime1.go
func args(c int32, v **byte) {
argc = c
argv = v
sysargs(c, v)
}
This function mainly sets the two global variables argc and argv, and in sysargs
, it retrieves/sets by reading argv:
startupRandomData
a 16-byte random data buffer set by the kernel (ld-linux.so)physPageSize
the system's physical page size- Reads the ELF header to obtain string tables, symbol tables, dynamic linking, and vdso information
- On Linux, it reads the symbol table to set two special pointers for vdso calls
var vdsoSymbolKeys = []vdsoSymbolKey{
{"__vdso_gettimeofday", 0x315ca59, 0xb01bca00, &vdsoGettimeofdaySym},
{"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &vdsoClockgettimeSym},
}
runtime.osinit#
// runtime/os_linux.go
func osinit() {
ncpu = getproccount()
physHugePageSize = getHugePageSize()
if iscgo {
// ... handle cgo signal related
}
osArchInit()
}
The osinit in Linux is relatively simple:
- Obtains processor information through
sched_getaffinity
- Obtains the physical size of transparent huge pages through
/sys/kernel/mm/transparent_hugepage/hpage_pmd_size
runtime.schedinit#
Initialization work for the Go coroutine scheduler, only key parts of the code are taken here, and some temporarily unimplemented empty function calls are ignored.
// runtime/proc.go
func schedinit() {
gp := getg()
sched.maxmcount = 10000
moduledataverify() // module data verification
stackinit() // coroutine stack memory pool initialization
mallocinit() // memory allocator initialization
alginit() // AES algorithm hardware support initialization
fastrandinit() // initialize random seed, using the previous startupRandomData
mcommoninit(gp.m, -1) // initialize thread increment ID signal handling coroutine and fast random seed
modulesinit() // read each module to initialize GC scanning global variable sizes
typelinksinit() // read each module to collect type link information
itabsinit() // initialize itab table based on typelink
stkobjinit() // stack initialization for GC related
sigsave(&gp.m.sigmask) // save thread signal mask
goargs() // set global variable argslice
goenvs() // set global variable envs
parsedebugvars() // read GODEBUG environment variable to set trace
gcinit() // GC initialization
// Initialize allp based on the number of physical processors or GOMAXPROCS
procs := ncpu
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
}
if procresize(procs) != nil {
throw("unknown runnable goroutine during bootstrap")
}
}
runtime.getg#
The runtime.getg function will be compiled into a single assembly during the compilation process, which retrieves the TLS base address. This base address is set to the current coroutine data pointer *g when the coroutine starts through runtime.gogo
.
// ir.OGETG -> ssa.OpGetG -> ssa.OpAMD64LoweredGetG -> MOVQ (TLS), r
TEXT runtime·gogo(SB), NOSPLIT, $0-8
// ...
JMP gogo<>(SB)
TEXT gogo<>(SB), NOSPLIT, $0
get_tls(CX)
MOVQ DX, g(CX)
runtime.procresize#
// runtime/proc.go
func procresize(nprocs int32) *p {
// The first half mainly modifies:
// 1. allp []*p
// 2. idlepMask idle P mask, each bit indicates whether the corresponding index in allp is idle
// 3. timerpMask mask for P that may have timers, each bit indicates whether the corresponding index in allp has a timer
// When expanding the number of procs, initialize the newly created P
// At the beginning of the program, allp is empty, and here all P will be created and initialized
for i := old; i < nprocs; i++ {
pp := allp[i]
if pp == nil {
pp = new(p)
}
pp.init(i)
atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
}
// When subsequently shrinking P, excess P will be destroyed
// So here we first ensure that the current P is not the one to be destroyed
// If it is, replace it with allp[0]
gp := getg()
if gp.m.p != 0 && gp.m.p.ptr().id < nprocs {
// Continue using the current P
gp.m.p.ptr().status = _Prunning
gp.m.p.ptr().mcache.prepareForSweep()
} else {
if gp.m.p != 0 {
gp.m.p.ptr().m = 0
}
gp.m.p = 0
pp := allp[0]
pp.m = 0
pp.status = _Pidle
acquirep(pp)
}
// Clean up old P when the number of procs shrinks
for i := nprocs; i < old; i++ {
pp := allp[i]
pp.destroy()
}
// Return the runnable P linked list
var runnablePs *p
for i := nprocs - 1; i >= 0; i-- {
pp := allp[i]
if gp.m.p.ptr() == pp {
continue
}
pp.status = _Pidle
if runqempty(pp) {
// If there are no runnable g on the local queue of p, place it on the idle p mask
pidleput(pp, now)
} else {
// Find an idle m for p, here m may be empty
pp.m.set(mget())
pp.link.set(runnablePs)
runnablePs = pp
}
}
if old != nprocs {
// If the number of procs changes, modify the proc capacity occupied by GC (default takes 25% of the proc count)
gcCPULimiter.resetCapacity(now, nprocs)
}
return runnablePs
}
runtime.newproc#
newproc takes a function (address) to create a new g and place it on the local queue of the current p, then wakes up the current p.
During the program startup, the address of runtime.main is passed here.
func newproc(fn *funcval) {
gp := getg()
pc := getcallerpc()
systemstack(func() {
newg := newproc1(fn, gp, pc)
pp := getg().m.p.ptr()
runqput(pp, newg, true)
if mainStarted {
// mainStarted is set in runtime.main
wakep()
}
})
}
func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g {
mp := acquirem()
pp := mp.p.ptr()
newg := gfget(pp) // Here it first tries to take one from the freeg linked list of p
if newg == nil {
// If not found, create a new g using malg
// The new g's stack has not been initialized, to avoid being scanned by GC, set the status to dead first
// Add it to the global allg
newg = malg(_StackMin)
casgstatus(newg, _Gidle, _Gdead)
allgadd(newg)
}
// Calculate the stack pointer position
totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame
totalSize = alignUp(totalSize, sys.StackAlign)
sp := newg.stack.hi - totalSize
// Set up newg's scheduling data, stack pointer, function address, program counter, caller information, etc.
memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
newg.sched.sp = sp
newg.stktopsp = sp
newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function
newg.sched.g = guintptr(unsafe.Pointer(newg))
gostartcallfn(&newg.sched, fn)
newg.parentGoid = callergp.goid
newg.gopc = callerpc
newg.ancestors = saveAncestors(callergp)
newg.startpc = fn.fn
// Change status to runnable and add the stack to the GC stack scan
casgstatus(newg, _Gdead, _Grunnable)
gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo))
newg.goid = pp.goidcache
pp.goidcache++
releasem(mp)
return newg
}
runtime.mstart0#
func mstart0() {
gp := getg()
// Initialize g0's stackguard for stack overflow and stack expansion checks
gp.stackguard0 = gp.stack.lo + _StackGuard
gp.stackguard1 = gp.stackguard0
mstart1()
mexit(osStack)
}
func mstart1() {
gp := getg()
// Set up m.g0.sched as a label returning to just
// after the mstart1 call in mstart0 above, for use by goexit0 and mcall.
// We're never coming back to mstart1 after we call schedule,
// so other calls can reuse the current frame.
// And goexit0 does a gogo that needs to return from mstart1
// and let mstart0 exit the thread.
gp.sched.g = guintptr(unsafe.Pointer(gp))
gp.sched.pc = getcallerpc()
gp.sched.sp = getcallersp()
// Initialize the thread's signal handling coroutine stack and signal mask
minit()
if gp.m == &m0 {
// Set the thread's signal handling function sighandler
mstartm0()
}
// Some internal threads like sysmon start directly here
if fn := gp.m.mstartfn; fn != nil {
fn()
}
if gp.m != &m0 {
acquirep(gp.m.nextp.ptr())
gp.m.nextp = 0
}
// Execute the scheduling loop, never returns
// Here there is currently only one p and one g
// All will jump to runtime.main
schedule()
}
func main() {
mp := getg().m
// Set the maximum stack size to 1G
if goarch.PtrSize == 8 {
maxstacksize = 1000000000
} else {
maxstacksize = 250000000
}
// The coroutine created by newproc can find or create a thread to execute through wakep
mainStarted = true
// Execute the init function under runtime and initialize global variables
doInit(&runtime_inittask)
// Enable GC
gcenable()
// Execute user-level init function and initialize global variables
doInit(&main_inittask)
// -buildmode=c-archive or c-shared does not execute main
if isarchive || islibrary {
return
}
// Execute the user-level main.main function
fn := main_main
fn()
// Before the main coroutine exits, if there are currently other coroutines handling panic-defer,
// it needs to wait for other coroutines to finish processing first (such as printing panic information, etc.)
if runningPanicDefers.Load() != 0 {
for c := 0; c < 1000; c++ {
if runningPanicDefers.Load() == 0 {
break
}
Gosched()
}
}
// Execute registered hooks before the process exits
// For example, output code coverage data in compile -cover mode
runExitHooks(0)
exit(0)
}