理解Docker容器网络之Linux Network Namespace
一、Docker的CNM网络模型

- Sandbox(沙盒):每个沙盒包含一个容器网络栈(network stack)的配置,配置包括:容器的网口、路由表和DNS设置等。
- Endpoint(端点):通过Endpoint,沙盒可以被加入到一个Network里。
- Network(网络):一组能相互直接通信的Endpoints。
# ip link show
... ...
3: docker0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default
link/ether 02:42:30:11:98:ef brd ff:ff:ff:ff:ff:ff
19: veth4559467@if18: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master docker0 state UP mode DEFAULT group default
link/ether a6:14:99:52:78:35 brd ff:ff:ff:ff:ff:ff link-netnsid 3
... ...
$ brctl show
bridge name bridge id STP enabled interfaces
... ...
docker0 8000.0242301198ef no veth4559467
二、Linux Bridge、VETH和Network Namespace
三、用Network namespace模拟Docker容器网络
1、环境
Client:
Version: 1.12.1
API version: 1.24
Go version: go1.6.3
Git commit: 23cf638
Built: Thu Aug 18 05:33:38 2016
OS/Arch: linux/amd64
Server:
Version: 1.12.1
API version: 1.24
Go version: go1.6.3
Git commit: 23cf638
Built: Thu Aug 18 05:33:38 2016
OS/Arch: linux/amd64
2、拓扑
sudo ip netns add Container_ns1
sudo ip netns add Container_ns2
$ sudo ip netns list
Container_ns2
Container_ns1
$ sudo ls /var/run/netns
Container_ns1 Container_ns2
$ sudo ip netns exec Container_ns1 ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
$ sudo ip netns exec Container_ns2 ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
$ sudo ip netns exec Container_ns2 ip route
$ sudo brctl addbr MyDocker0
$ brctl show
bridge name bridge id STP enabled interfaces
MyDocker0 8000.000000000000 no
$ sudo ip addr add 172.16.1.254/16 dev MyDocker0
$ sudo ip link set dev MyDocker0 up
$ route -n
内核 IP 路由表
目标 网关 子网掩码 标志 跃点 引用 使用 接口
0.0.0.0 10.11.36.1 0.0.0.0 UG 100 0 0 eno1
... ...
172.16.0.0 0.0.0.0 255.255.0.0 U 0 0 0 MyDocker0
... ...
$sudo ip link add veth1 type veth peer name veth1p
$sudo ip -d link show
... ...
21: veth1p@veth1: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 66:6d:e7:75:3f:43 brd ff:ff:ff:ff:ff:ff promiscuity 0
veth addrgenmode eui64
22: veth1@veth1p: <BROADCAST,MULTICAST,M-DOWN> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/ether 56:cd:bb:f2:10:3f brd ff:ff:ff:ff:ff:ff promiscuity 0
veth addrgenmode eui64
... ...
$ sudo brctl addif MyDocker0 veth1
$ sudo ip link set veth1 up
$ brctl show
bridge name bridge id STP enabled interfaces
MyDocker0 8000.56cdbbf2103f no veth1
$ sudo ip link set veth1p netns Container_ns1
$ sudo ip netns exec Container_ns1 ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
21: veth1p@if22: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
link/ether 66:6d:e7:75:3f:43 brd ff:ff:ff:ff:ff:ff link-netnsid 0
$ sudo ip netns exec Container_ns1 ip link set veth1p name eth0
$ sudo ip netns exec Container_ns1 ip a
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
21: eth0@if22: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
link/ether 66:6d:e7:75:3f:43 brd ff:ff:ff:ff:ff:ff link-netnsid 0
$ sudo ip netns exec Container_ns1 ip link set eth0 up
$ sudo ip netns exec Container_ns1 ip addr add 172.16.1.1/16 dev eth0
sudo ip netns exec Container_ns1 ip route
172.16.0.0/16 dev eth0 proto kernel scope link src 172.16.1.1
$ sudo ip netns exec Container_ns1 ping -c 3 172.16.1.254
PING 172.16.1.254 (172.16.1.254) 56(84) bytes of data.
64 bytes from 172.16.1.254: icmp_seq=1 ttl=64 time=0.074 ms
64 bytes from 172.16.1.254: icmp_seq=2 ttl=64 time=0.064 ms
64 bytes from 172.16.1.254: icmp_seq=3 ttl=64 time=0.068 ms
--- 172.16.1.254 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 1998ms
rtt min/avg/max/mdev = 0.064/0.068/0.074/0.010 ms
$ sudo ip netns exec Container_ns1 ping -c 3 172.17.0.1
connect: Network is unreachable
$ sudo ip netns exec Container_ns1 ip route add default via 172.16.1.254
$ sudo ip netns exec Container_ns1 ip route
default via 172.16.1.254 dev eth0
172.16.0.0/16 dev eth0 proto kernel scope link src 172.16.1.1
$ sudo ip netns exec Container_ns1 ping -c 3 172.17.0.1
PING 172.17.0.1 (172.17.0.1) 56(84) bytes of data.
64 bytes from 172.17.0.1: icmp_seq=1 ttl=64 time=0.068 ms
64 bytes from 172.17.0.1: icmp_seq=2 ttl=64 time=0.076 ms
64 bytes from 172.17.0.1: icmp_seq=3 ttl=64 time=0.069 ms
--- 172.17.0.1 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 1999ms
rtt min/avg/max/mdev = 0.068/0.071/0.076/0.003 ms
$ sudo ip link add veth2 type veth peer name veth2p
$ sudo brctl addif MyDocker0 veth2
$ sudo ip link set veth2 up
$ sudo ip link set veth2p netns Container_ns2
$ sudo ip netns exec Container_ns2 ip link set veth2p name eth0
$ sudo ip netns exec Container_ns2 ip link set eth0 up
$ sudo ip netns exec Container_ns2 ip addr add 172.16.1.2/16 dev eth0
$ sudo ip netns exec Container_ns2 ip route add default via 172.16.1.254
$ sudo ip netns exec Container_ns2 ping -c 3 172.16.1.1
PING 172.16.1.1 (172.16.1.1) 56(84) bytes of data.
64 bytes from 172.16.1.1: icmp_seq=1 ttl=64 time=0.101 ms
64 bytes from 172.16.1.1: icmp_seq=2 ttl=64 time=0.083 ms
64 bytes from 172.16.1.1: icmp_seq=3 ttl=64 time=0.087 ms
--- 172.16.1.1 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 1998ms
rtt min/avg/max/mdev = 0.083/0.090/0.101/0.010 ms
$ sudo ip netns exec Container_ns1 ping -c 3 172.16.1.2
PING 172.16.1.2 (172.16.1.2) 56(84) bytes of data.
64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=0.053 ms
64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=0.092 ms
64 bytes from 172.16.1.2: icmp_seq=3 ttl=64 time=0.089 ms
--- 172.16.1.2 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 1999ms
rtt min/avg/max/mdev = 0.053/0.078/0.092/0.017 ms
$ sudo ip netns exec Container_ns1 ip route
default via 172.16.1.254 dev eth0
172.16.0.0/16 dev eth0 proto kernel scope link src 172.16.1.1
$ sudo ip netns exec Container_ns1 traceroute 172.17.0.2
traceroute to 172.17.0.2 (172.17.0.2), 30 hops max, 60 byte packets
1 172.16.1.254 (172.16.1.254) 0.082 ms 0.023 ms 0.019 ms
2 172.17.0.2 (172.17.0.2) 0.054 ms 0.034 ms 0.029 ms
$ sudo ip netns exec Container_ns1 ping -c 3 172.17.0.2
PING 172.17.0.2 (172.17.0.2) 56(84) bytes of data.
64 bytes from 172.17.0.2: icmp_seq=1 ttl=63 time=0.084 ms
64 bytes from 172.17.0.2: icmp_seq=2 ttl=63 time=0.101 ms
64 bytes from 172.17.0.2: icmp_seq=3 ttl=63 time=0.098 ms
--- 172.17.0.2 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 1998ms
rtt min/avg/max/mdev = 0.084/0.094/0.101/0.010 ms
四、基于userland proxy的容器端口映射的模拟
$ sudo docker run -d -p 9091:80 nginx:latest
8eef60e3d7b48140c20b11424ee8931be25bc47b5233aa42550efabd5730ac2f
$ curl 10.11.36.15:9091
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
body {
width: 35em;
margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif;
}
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>
<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>
<p><em>Thank you for using nginx.</em></p>
</body>
</html>
$ ps -ef|grep docker-proxy
root 26246 6228 0 16:18 ? 00:00:00 /usr/bin/docker-proxy -proto tcp -host-ip 0.0.0.0 -host-port 9091 -container-ip 172.17.0.2 -container-port 80
//testfileserver.go
package main
import "net/http"
func main() {
http.ListenAndServe(":8080", http.FileServer(http.Dir(".")))
}
$ sudo ip netns exec Container_ns1 ./testfileserver
$ sudo ip netns exec Container_ns1 lsof -i tcp:8080
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
testfiles 3605 root 3u IPv4 297022 0t0 TCP *:http-alt (LISTEN)
//proxy.go
... ...
var (
host string
port string
container string
containerport string
)
func main() {
flag.StringVar(&host, "host", "0.0.0.0", "host addr")
flag.StringVar(&port, "port", "", "host port")
flag.StringVar(&container, "container", "", "container addr")
flag.StringVar(&containerport, "containerport", "8080", "container port")
flag.Parse()
fmt.Printf("%s\n%s\n%s\n%s", host, port, container, containerport)
ln, err := net.Listen("tcp", host+":"+port)
if err != nil {
// handle error
log.Println("listen error:", err)
return
}
log.Println("listen ok")
for {
conn, err := ln.Accept()
if err != nil {
// handle error
log.Println("accept error:", err)
continue
}
log.Println("accept conn", conn)
go handleConnection(conn)
}
}
func handleConnection(conn net.Conn) {
cli, err := net.Dial("tcp", container+":"+containerport)
if err != nil {
log.Println("dial error:", err)
return
}
log.Println("dial ", container+":"+containerport, " ok")
go io.Copy(conn, cli)
_, err = io.Copy(cli, conn)
fmt.Println("communication over: error:", err)
}
./proxy -host 0.0.0.0 -port 9090 -container 172.16.1.1 -containerport 8080
0.0.0.0
9090
172.16.1.1
80802017/01/11 17:26:10 listen ok
$curl 10.11.36.15:9090
<pre>
<a href="proxy">proxy</a>
<a href="proxy.go">proxy.go</a>
<a href="testfileserver">testfileserver</a>
<a href="testfileserver.go">testfileserver.go</a>
</pre>
2017/01/11 17:26:16 accept conn &{{0xc4200560e0}}
2017/01/11 17:26:16 dial 172.16.1.1:8080 ok
communication over: error:<nil>