From 69b3be763a9b1f85c88f2e42f2563a0769c07938 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 06:49:47 +0000 Subject: [PATCH] build(deps): bump github.com/vishvananda/netlink from 1.1.0 to 1.3.0 Bumps [github.com/vishvananda/netlink](https://github.com/vishvananda/netlink) from 1.1.0 to 1.3.0. - [Release notes](https://github.com/vishvananda/netlink/releases) - [Commits](https://github.com/vishvananda/netlink/compare/v1.1.0...v1.3.0) --- updated-dependencies: - dependency-name: github.com/vishvananda/netlink dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 4 +- go.sum | 11 +- .../github.com/vishvananda/netlink/.gitignore | 1 + .../vishvananda/netlink/.travis.yml | 19 - .../github.com/vishvananda/netlink/README.md | 2 +- vendor/github.com/vishvananda/netlink/addr.go | 1 + .../vishvananda/netlink/addr_linux.go | 98 +- .../vishvananda/netlink/bpf_linux.go | 24 + .../vishvananda/netlink/bridge_linux.go | 45 +- .../github.com/vishvananda/netlink/chain.go | 22 + .../vishvananda/netlink/chain_linux.go | 112 ++ .../github.com/vishvananda/netlink/class.go | 58 +- .../vishvananda/netlink/class_linux.go | 40 +- .../vishvananda/netlink/conntrack_linux.go | 612 +++++++++- .../netlink/conntrack_unspecified.go | 19 + .../vishvananda/netlink/devlink_linux.go | 897 +++++++++++++- .../github.com/vishvananda/netlink/filter.go | 184 ++- .../vishvananda/netlink/filter_linux.go | 535 ++++++-- .../vishvananda/netlink/handle_linux.go | 45 +- .../vishvananda/netlink/handle_unspecified.go | 32 +- .../vishvananda/netlink/inet_diag.go | 40 + .../vishvananda/netlink/ipset_linux.go | 581 +++++++++ vendor/github.com/vishvananda/netlink/link.go | 439 ++++++- .../vishvananda/netlink/link_linux.go | 1087 +++++++++++++++-- .../github.com/vishvananda/netlink/neigh.go | 1 + .../vishvananda/netlink/neigh_linux.go | 85 +- .../netlink/netlink_unspecified.go | 54 +- .../vishvananda/netlink/netns_linux.go | 6 +- .../vishvananda/netlink/nl/addr_linux.go | 14 +- .../vishvananda/netlink/nl/conntrack_linux.go | 45 +- .../vishvananda/netlink/nl/devlink_linux.go | 118 +- .../vishvananda/netlink/nl/ip6tnl_linux.go | 21 + .../vishvananda/netlink/nl/ipset_linux.go | 227 ++++ .../vishvananda/netlink/nl/link_linux.go | 216 +++- .../vishvananda/netlink/nl/lwt_linux.go | 29 + .../vishvananda/netlink/nl/nl_linux.go | 346 +++++- .../netlink/nl/parse_attr_linux.go | 79 ++ .../vishvananda/netlink/nl/rdma_link_linux.go | 4 + .../vishvananda/netlink/nl/route_linux.go | 4 +- .../vishvananda/netlink/nl/seg6_linux.go | 4 +- .../vishvananda/netlink/nl/seg6local_linux.go | 4 + .../vishvananda/netlink/nl/syscall.go | 10 +- .../vishvananda/netlink/nl/tc_linux.go | 747 ++++++++++- .../vishvananda/netlink/nl/vdpa_linux.go | 41 + .../vishvananda/netlink/nl/xfrm_linux.go | 10 +- .../netlink/nl/xfrm_state_linux.go | 29 +- .../vishvananda/netlink/proc_event_linux.go | 208 ++++ .../vishvananda/netlink/protinfo.go | 24 +- .../vishvananda/netlink/protinfo_linux.go | 4 + .../github.com/vishvananda/netlink/qdisc.go | 92 +- .../vishvananda/netlink/qdisc_linux.go | 148 ++- .../vishvananda/netlink/rdma_link_linux.go | 97 +- .../github.com/vishvananda/netlink/route.go | 94 +- .../vishvananda/netlink/route_linux.go | 946 ++++++++++++-- .../vishvananda/netlink/route_unspecified.go | 10 + vendor/github.com/vishvananda/netlink/rule.go | 50 +- .../vishvananda/netlink/rule_linux.go | 159 ++- .../vishvananda/netlink/rule_nonlinux.go | 8 + .../github.com/vishvananda/netlink/socket.go | 77 ++ .../vishvananda/netlink/socket_linux.go | 484 +++++++- .../vishvananda/netlink/socket_xdp_linux.go | 195 +++ vendor/github.com/vishvananda/netlink/tcp.go | 92 ++ .../vishvananda/netlink/tcp_linux.go | 368 ++++++ .../vishvananda/netlink/unix_diag.go | 27 + .../vishvananda/netlink/vdpa_linux.go | 463 +++++++ .../github.com/vishvananda/netlink/virtio.go | 132 ++ .../vishvananda/netlink/xdp_diag.go | 34 + .../vishvananda/netlink/xdp_linux.go | 46 + .../netlink/{xfrm.go => xfrm_linux.go} | 2 +- .../vishvananda/netlink/xfrm_policy.go | 96 -- .../vishvananda/netlink/xfrm_policy_linux.go | 113 +- .../vishvananda/netlink/xfrm_state.go | 131 -- .../vishvananda/netlink/xfrm_state_linux.go | 236 +++- .../vishvananda/netlink/xfrm_unspecified.go | 7 + .../vishvananda/netns/.golangci.yml | 2 + vendor/github.com/vishvananda/netns/README.md | 1 + vendor/github.com/vishvananda/netns/doc.go | 9 + .../vishvananda/netns/netns_linux.go | 180 ++- .../{netns_unspecified.go => netns_others.go} | 17 + .../netns/{netns.go => nshandle_linux.go} | 31 +- .../vishvananda/netns/nshandle_others.go | 45 + vendor/modules.txt | 6 +- 82 files changed, 10573 insertions(+), 1063 deletions(-) delete mode 100644 vendor/github.com/vishvananda/netlink/.travis.yml create mode 100644 vendor/github.com/vishvananda/netlink/chain.go create mode 100644 vendor/github.com/vishvananda/netlink/chain_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/inet_diag.go create mode 100644 vendor/github.com/vishvananda/netlink/ipset_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/ipset_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/lwt_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/proc_event_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/rule_nonlinux.go create mode 100644 vendor/github.com/vishvananda/netlink/socket_xdp_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/tcp.go create mode 100644 vendor/github.com/vishvananda/netlink/tcp_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/unix_diag.go create mode 100644 vendor/github.com/vishvananda/netlink/vdpa_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/virtio.go create mode 100644 vendor/github.com/vishvananda/netlink/xdp_diag.go create mode 100644 vendor/github.com/vishvananda/netlink/xdp_linux.go rename vendor/github.com/vishvananda/netlink/{xfrm.go => xfrm_linux.go} (95%) delete mode 100644 vendor/github.com/vishvananda/netlink/xfrm_policy.go delete mode 100644 vendor/github.com/vishvananda/netlink/xfrm_state.go create mode 100644 vendor/github.com/vishvananda/netlink/xfrm_unspecified.go create mode 100644 vendor/github.com/vishvananda/netns/.golangci.yml create mode 100644 vendor/github.com/vishvananda/netns/doc.go rename vendor/github.com/vishvananda/netns/{netns_unspecified.go => netns_others.go} (63%) rename vendor/github.com/vishvananda/netns/{netns.go => nshandle_linux.go} (60%) create mode 100644 vendor/github.com/vishvananda/netns/nshandle_others.go diff --git a/go.mod b/go.mod index 12066f04f92..1b5dc485b58 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 github.com/urfave/cli v1.22.16 - github.com/vishvananda/netlink v1.1.0 + github.com/vishvananda/netlink v1.3.0 golang.org/x/net v0.24.0 golang.org/x/sys v0.22.0 google.golang.org/protobuf v1.35.1 @@ -34,6 +34,6 @@ require ( require ( github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect + github.com/vishvananda/netns v0.0.4 // indirect golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 // indirect ) diff --git a/go.sum b/go.sum index a5f51a63849..37f69162460 100644 --- a/go.sum +++ b/go.sum @@ -77,20 +77,21 @@ github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtse github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/urfave/cli v1.22.16 h1:MH0k6uJxdwdeWQTwhSO42Pwr4YLrNLwBtg1MRgTqPdQ= github.com/urfave/cli v1.22.16/go.mod h1:EeJR6BKodywf4zciqrdw6hpCPk68JO9z5LazXZMn5Po= -github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= -github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k= -github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= +github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk= +github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs= +github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= +github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/vendor/github.com/vishvananda/netlink/.gitignore b/vendor/github.com/vishvananda/netlink/.gitignore index 9f11b755a17..66f8fb50272 100644 --- a/vendor/github.com/vishvananda/netlink/.gitignore +++ b/vendor/github.com/vishvananda/netlink/.gitignore @@ -1 +1,2 @@ .idea/ +.vscode/ diff --git a/vendor/github.com/vishvananda/netlink/.travis.yml b/vendor/github.com/vishvananda/netlink/.travis.yml deleted file mode 100644 index 7d14af4d6da..00000000000 --- a/vendor/github.com/vishvananda/netlink/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -language: go -go: - - "1.10.x" - - "1.11.x" - - "1.12.x" -before_script: - # make sure we keep path in tact when we sudo - - sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers - # modprobe ip_gre or else the first gre device can't be deleted - - sudo modprobe ip_gre - # modprobe nf_conntrack for the conntrack testing - - sudo modprobe nf_conntrack - - sudo modprobe nf_conntrack_netlink - - sudo modprobe nf_conntrack_ipv4 - - sudo modprobe nf_conntrack_ipv6 - - sudo modprobe sch_hfsc -install: - - go get github.com/vishvananda/netns -go_import_path: github.com/vishvananda/netlink diff --git a/vendor/github.com/vishvananda/netlink/README.md b/vendor/github.com/vishvananda/netlink/README.md index a88e2f41840..0128bc67d55 100644 --- a/vendor/github.com/vishvananda/netlink/README.md +++ b/vendor/github.com/vishvananda/netlink/README.md @@ -1,6 +1,6 @@ # netlink - netlink library for go # -[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) +![Build Status](https://github.com/vishvananda/netlink/actions/workflows/main.yml/badge.svg) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) The netlink package provides a simple netlink library for go. Netlink is the interface a user-space program in linux uses to communicate with diff --git a/vendor/github.com/vishvananda/netlink/addr.go b/vendor/github.com/vishvananda/netlink/addr.go index f08c956969d..653f540dbc0 100644 --- a/vendor/github.com/vishvananda/netlink/addr.go +++ b/vendor/github.com/vishvananda/netlink/addr.go @@ -17,6 +17,7 @@ type Addr struct { Broadcast net.IP PreferedLft int ValidLft int + LinkIndex int } // String returns $ip/$netmask $label diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go index 28746d5afec..218ab237965 100644 --- a/vendor/github.com/vishvananda/netlink/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -11,9 +11,6 @@ import ( "golang.org/x/sys/unix" ) -// IFA_FLAGS is a u32 attribute. -const IFA_FLAGS = 0x8 - // AddrAdd will add an IP address to a link device. // // Equivalent to: `ip addr add $addr dev $link` @@ -77,17 +74,19 @@ func (h *Handle) AddrDel(link Link, addr *Addr) error { } func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error { - base := link.Attrs() - if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { - return fmt.Errorf("label must begin with interface name") - } - h.ensureIndex(base) - family := nl.GetIPFamily(addr.IP) - msg := nl.NewIfAddrmsg(family) - msg.Index = uint32(base.Index) msg.Scope = uint8(addr.Scope) + if link == nil { + msg.Index = uint32(addr.LinkIndex) + } else { + base := link.Attrs() + if addr.Label != "" && !strings.HasPrefix(addr.Label, base.Name) { + return fmt.Errorf("label must begin with interface name") + } + h.ensureIndex(base) + msg.Index = uint32(base.Index) + } mask := addr.Mask if addr.Peer != nil { mask = addr.Peer.Mask @@ -125,7 +124,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error } else { b := make([]byte, 4) native.PutUint32(b, uint32(addr.Flags)) - flagsData := nl.NewRtAttr(IFA_FLAGS, b) + flagsData := nl.NewRtAttr(unix.IFA_FLAGS, b) req.AddData(flagsData) } } @@ -156,10 +155,10 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error // value should be "forever". To compensate for that, only add the attributes if at least one of the values is // non-zero, which means the caller has explicitly set them if addr.ValidLft > 0 || addr.PreferedLft > 0 { - cachedata := nl.IfaCacheInfo{ - IfaValid: uint32(addr.ValidLft), - IfaPrefered: uint32(addr.PreferedLft), - } + cachedata := nl.IfaCacheInfo{unix.IfaCacheinfo{ + Valid: uint32(addr.ValidLft), + Prefered: uint32(addr.PreferedLft), + }} req.AddData(nl.NewRtAttr(unix.IFA_CACHEINFO, cachedata.Serialize())) } @@ -179,7 +178,7 @@ func AddrList(link Link, family int) ([]Addr, error) { // The list can be filtered by link and ip family. func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) - msg := nl.NewIfInfomsg(family) + msg := nl.NewIfAddrmsg(family) req.AddData(msg) msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR) @@ -196,12 +195,12 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { var res []Addr for _, m := range msgs { - addr, msgFamily, ifindex, err := parseAddr(m) + addr, msgFamily, err := parseAddr(m) if err != nil { return res, err } - if link != nil && ifindex != indexFilter { + if link != nil && addr.LinkIndex != indexFilter { // Ignore messages from other interfaces continue } @@ -216,11 +215,11 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { return res, nil } -func parseAddr(m []byte) (addr Addr, family, index int, err error) { +func parseAddr(m []byte) (addr Addr, family int, err error) { msg := nl.DeserializeIfAddrmsg(m) family = -1 - index = -1 + addr.LinkIndex = -1 attrs, err1 := nl.ParseRouteAttr(m[msg.Len():]) if err1 != nil { @@ -229,7 +228,7 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { } family = int(msg.Family) - index = int(msg.Index) + addr.LinkIndex = int(msg.Index) var local, dst *net.IPNet for _, attr := range attrs { @@ -254,12 +253,12 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { addr.Broadcast = attr.Value case unix.IFA_LABEL: addr.Label = string(attr.Value[:len(attr.Value)-1]) - case IFA_FLAGS: + case unix.IFA_FLAGS: addr.Flags = int(native.Uint32(attr.Value[0:4])) - case nl.IFA_CACHEINFO: + case unix.IFA_CACHEINFO: ci := nl.DeserializeIfaCacheInfo(attr.Value) - addr.PreferedLft = int(ci.IfaPrefered) - addr.ValidLft = int(ci.IfaValid) + addr.PreferedLft = int(ci.Prefered) + addr.ValidLft = int(ci.Valid) } } @@ -271,7 +270,7 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { // But obviously, as there are IPv6 PtP addresses, too, // IFA_LOCAL should also be handled for IPv6. if local != nil { - if family == FAMILY_V4 && local.IP.Equal(dst.IP) { + if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) { addr.IPNet = dst } else { addr.IPNet = local @@ -299,22 +298,24 @@ type AddrUpdate struct { // AddrSubscribe takes a chan down which notifications will be sent // when addresses change. Close the 'done' chan to stop subscription. func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0) + return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // AddrSubscribeAt works like AddrSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0) + return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // AddrSubscribeOptions contains a set of options to use with // AddrSubscribeWithOptions. type AddrSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool - ReceiveBufferSize int + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // AddrSubscribeWithOptions work like AddrSubscribe but enable to @@ -325,26 +326,33 @@ func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, option none := netns.None() options.Namespace = &none } - return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize) + return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int) error { +func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvBufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR) if err != nil { return err } - if done != nil { - go func() { - <-done - s.Close() - }() + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } } if rcvbuf != 0 { - err = pkgHandle.SetSocketReceiveBufferSize(rcvbuf, false) + err = s.SetReceiveBufferSize(rcvbuf, rcvBufForce) if err != nil { return err } } + if done != nil { + go func() { + <-done + s.Close() + }() + } if listExisting { req := pkgHandle.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) @@ -360,7 +368,8 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -375,7 +384,6 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() error := int32(native.Uint32(m.Data[0:4])) if error == 0 { continue @@ -394,7 +402,7 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c continue } - addr, _, ifindex, err := parseAddr(m.Data) + addr, _, err := parseAddr(m.Data) if err != nil { if cberr != nil { cberr(fmt.Errorf("could not parse address: %v", err)) @@ -403,7 +411,7 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c } ch <- AddrUpdate{LinkAddress: *addr.IPNet, - LinkIndex: ifindex, + LinkIndex: addr.LinkIndex, NewAddr: msgType == unix.RTM_NEWADDR, Flags: addr.Flags, Scope: addr.Scope, diff --git a/vendor/github.com/vishvananda/netlink/bpf_linux.go b/vendor/github.com/vishvananda/netlink/bpf_linux.go index 6631626bfc0..96befbfe0ca 100644 --- a/vendor/github.com/vishvananda/netlink/bpf_linux.go +++ b/vendor/github.com/vishvananda/netlink/bpf_linux.go @@ -16,6 +16,30 @@ const ( BPF_PROG_TYPE_SCHED_ACT BPF_PROG_TYPE_TRACEPOINT BPF_PROG_TYPE_XDP + BPF_PROG_TYPE_PERF_EVENT + BPF_PROG_TYPE_CGROUP_SKB + BPF_PROG_TYPE_CGROUP_SOCK + BPF_PROG_TYPE_LWT_IN + BPF_PROG_TYPE_LWT_OUT + BPF_PROG_TYPE_LWT_XMIT + BPF_PROG_TYPE_SOCK_OPS + BPF_PROG_TYPE_SK_SKB + BPF_PROG_TYPE_CGROUP_DEVICE + BPF_PROG_TYPE_SK_MSG + BPF_PROG_TYPE_RAW_TRACEPOINT + BPF_PROG_TYPE_CGROUP_SOCK_ADDR + BPF_PROG_TYPE_LWT_SEG6LOCAL + BPF_PROG_TYPE_LIRC_MODE2 + BPF_PROG_TYPE_SK_REUSEPORT + BPF_PROG_TYPE_FLOW_DISSECTOR + BPF_PROG_TYPE_CGROUP_SYSCTL + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE + BPF_PROG_TYPE_CGROUP_SOCKOPT + BPF_PROG_TYPE_TRACING + BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_EXT + BPF_PROG_TYPE_LSM + BPF_PROG_TYPE_SK_LOOKUP ) type BPFAttr struct { diff --git a/vendor/github.com/vishvananda/netlink/bridge_linux.go b/vendor/github.com/vishvananda/netlink/bridge_linux.go index 6e1224c47b8..6c340b0ce9a 100644 --- a/vendor/github.com/vishvananda/netlink/bridge_linux.go +++ b/vendor/github.com/vishvananda/netlink/bridge_linux.go @@ -63,7 +63,19 @@ func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanAdd adds a new vlan filter entry // Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, 0, pvid, untagged, self, master) +} + +// BridgeVlanAddRange adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanAddRange(link, vid, vidEnd, pvid, untagged, self, master) +} + +// BridgeVlanAddRange adds a new vlan filter entry +// Equivalent to: `bridge vlan add dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanAddRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, vidEnd, pvid, untagged, self, master) } // BridgeVlanDel adds a new vlan filter entry @@ -75,10 +87,22 @@ func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanDel adds a new vlan filter entry // Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, 0, pvid, untagged, self, master) } -func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error { +// BridgeVlanDelRange adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return pkgHandle.BridgeVlanDelRange(link, vid, vidEnd, pvid, untagged, self, master) +} + +// BridgeVlanDelRange adds a new vlan filter entry +// Equivalent to: `bridge vlan del dev DEV vid VID-VIDEND [ pvid ] [ untagged ] [ self ] [ master ]` +func (h *Handle) BridgeVlanDelRange(link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, vidEnd, pvid, untagged, self, master) +} + +func (h *Handle) bridgeVlanModify(cmd int, link Link, vid, vidEnd uint16, pvid, untagged, self, master bool) error { base := link.Attrs() h.ensureIndex(base) req := h.newNetlinkRequest(cmd, unix.NLM_F_ACK) @@ -105,7 +129,20 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged if untagged { vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED } - br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + + if vidEnd != 0 { + vlanEndInfo := &nl.BridgeVlanInfo{Vid: vidEnd} + vlanEndInfo.Flags = vlanInfo.Flags + + vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_BEGIN + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + + vlanEndInfo.Flags |= nl.BRIDGE_VLAN_INFO_RANGE_END + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanEndInfo.Serialize()) + } else { + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + } + req.AddData(br) _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err diff --git a/vendor/github.com/vishvananda/netlink/chain.go b/vendor/github.com/vishvananda/netlink/chain.go new file mode 100644 index 00000000000..1d1c144e95a --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/chain.go @@ -0,0 +1,22 @@ +package netlink + +import ( + "fmt" +) + +// Chain contains the attributes of a Chain +type Chain struct { + Parent uint32 + Chain uint32 +} + +func (c Chain) String() string { + return fmt.Sprintf("{Parent: %d, Chain: %d}", c.Parent, c.Chain) +} + +func NewChain(parent uint32, chain uint32) Chain { + return Chain{ + Parent: parent, + Chain: chain, + } +} diff --git a/vendor/github.com/vishvananda/netlink/chain_linux.go b/vendor/github.com/vishvananda/netlink/chain_linux.go new file mode 100644 index 00000000000..d9f441613cc --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/chain_linux.go @@ -0,0 +1,112 @@ +package netlink + +import ( + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// ChainDel will delete a chain from the system. +func ChainDel(link Link, chain Chain) error { + // Equivalent to: `tc chain del $chain` + return pkgHandle.ChainDel(link, chain) +} + +// ChainDel will delete a chain from the system. +// Equivalent to: `tc chain del $chain` +func (h *Handle) ChainDel(link Link, chain Chain) error { + return h.chainModify(unix.RTM_DELCHAIN, 0, link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func ChainAdd(link Link, chain Chain) error { + return pkgHandle.ChainAdd(link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func (h *Handle) ChainAdd(link Link, chain Chain) error { + return h.chainModify( + unix.RTM_NEWCHAIN, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, + link, + chain) +} + +func (h *Handle) chainModify(cmd, flags int, link Link, chain Chain) error { + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: chain.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(chain.Chain))) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func ChainList(link Link, parent uint32) ([]Chain, error) { + return pkgHandle.ChainList(link, parent) +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func (h *Handle) ChainList(link Link, parent uint32) ([]Chain, error) { + req := h.newNetlinkRequest(unix.RTM_GETCHAIN, unix.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: parent, + } + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWCHAIN) + if err != nil { + return nil, err + } + + var res []Chain + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip chains from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + var chain Chain + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_CHAIN: + chain.Chain = native.Uint32(attr.Value) + chain.Parent = parent + } + } + res = append(res, chain) + } + + return res, nil +} diff --git a/vendor/github.com/vishvananda/netlink/class.go b/vendor/github.com/vishvananda/netlink/class.go index dcc22d9e97e..e686f674507 100644 --- a/vendor/github.com/vishvananda/netlink/class.go +++ b/vendor/github.com/vishvananda/netlink/class.go @@ -47,6 +47,7 @@ type ClassStatistics struct { Basic *GnetStatsBasic Queue *GnetStatsQueue RateEst *GnetStatsRateEst + BasicHw *GnetStatsBasic // Hardward statistics added in kernel 4.20 } // NewClassStatistics Construct a ClassStatistics struct which fields are all initialized by 0. @@ -55,6 +56,7 @@ func NewClassStatistics() *ClassStatistics { Basic: &GnetStatsBasic{}, Queue: &GnetStatsQueue{}, RateEst: &GnetStatsRateEst{}, + BasicHw: &GnetStatsBasic{}, } } @@ -132,7 +134,10 @@ func (class *GenericClass) Type() string { return class.ClassType } -// ServiceCurve is the way the HFSC curve are represented +// ServiceCurve is a nondecreasing function of some time unit, returning the amount of service +// (an allowed or allocated amount of bandwidth) at some specific point in time. The purpose of it +// should be subconsciously obvious: if a class was allowed to transfer not less than the amount +// specified by its service curve, then the service curve is not violated. type ServiceCurve struct { m1 uint32 d uint32 @@ -144,6 +149,21 @@ func (c *ServiceCurve) Attrs() (uint32, uint32, uint32) { return c.m1, c.d, c.m2 } +// Burst returns the burst rate (m1) of the curve +func (c *ServiceCurve) Burst() uint32 { + return c.m1 +} + +// Delay return the delay (d) of the curve +func (c *ServiceCurve) Delay() uint32 { + return c.d +} + +// Rate returns the rate (m2) of the curve +func (c *ServiceCurve) Rate() uint32 { + return c.m2 +} + // HfscClass is a representation of the HFSC class type HfscClass struct { ClassAttrs @@ -152,35 +172,44 @@ type HfscClass struct { Usc ServiceCurve } -// SetUsc sets the Usc curve +// SetUsc sets the USC curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. func (hfsc *HfscClass) SetUsc(m1 uint32, d uint32, m2 uint32) { - hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2} } -// SetFsc sets the Fsc curve +// SetFsc sets the Fsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. func (hfsc *HfscClass) SetFsc(m1 uint32, d uint32, m2 uint32) { - hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2} } -// SetRsc sets the Rsc curve +// SetRsc sets the Rsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. func (hfsc *HfscClass) SetRsc(m1 uint32, d uint32, m2 uint32) { - hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2} } -// SetSC implements the SC from the tc CLI +// SetSC implements the SC from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. func (hfsc *HfscClass) SetSC(m1 uint32, d uint32, m2 uint32) { - hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} - hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.SetRsc(m1, d, m2) + hfsc.SetFsc(m1, d, m2) } -// SetUL implements the UL from the tc CLI +// SetUL implements the UL from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. func (hfsc *HfscClass) SetUL(m1 uint32, d uint32, m2 uint32) { - hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.SetUsc(m1, d, m2) } -// SetLS implements the LS from the tc CLI +// SetLS implements the LS from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. func (hfsc *HfscClass) SetLS(m1 uint32, d uint32, m2 uint32) { - hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8} + hfsc.SetFsc(m1, d, m2) } // NewHfscClass returns a new HFSC struct with the set parameters @@ -193,6 +222,7 @@ func NewHfscClass(attrs ClassAttrs) *HfscClass { } } +// String() returns a string that contains the information and attributes of the HFSC class func (hfsc *HfscClass) String() string { return fmt.Sprintf( "{%s -- {RSC: {m1=%d d=%d m2=%d}} {FSC: {m1=%d d=%d m2=%d}} {USC: {m1=%d d=%d m2=%d}}}", diff --git a/vendor/github.com/vishvananda/netlink/class_linux.go b/vendor/github.com/vishvananda/netlink/class_linux.go index 31091e5010a..a82eb09de24 100644 --- a/vendor/github.com/vishvananda/netlink/class_linux.go +++ b/vendor/github.com/vishvananda/netlink/class_linux.go @@ -43,12 +43,12 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { if buffer == 0 { buffer = uint32(float64(rate)/Hz() + float64(mtu)) } - buffer = uint32(Xmittime(rate, buffer)) + buffer = Xmittime(rate, buffer) if cbuffer == 0 { cbuffer = uint32(float64(ceil)/Hz() + float64(mtu)) } - cbuffer = uint32(Xmittime(ceil, cbuffer)) + cbuffer = Xmittime(ceil, cbuffer) return &HtbClass{ ClassAttrs: attrs, @@ -56,9 +56,9 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { Ceil: ceil, Buffer: buffer, Cbuffer: cbuffer, - Quantum: 10, Level: 0, - Prio: 0, + Prio: cattrs.Prio, + Quantum: cattrs.Quantum, } } @@ -176,12 +176,21 @@ func classPayload(req *nl.NetlinkRequest, class Class) error { options.AddRtAttr(nl.TCA_HTB_PARMS, opt.Serialize()) options.AddRtAttr(nl.TCA_HTB_RTAB, SerializeRtab(rtab)) options.AddRtAttr(nl.TCA_HTB_CTAB, SerializeRtab(ctab)) + if htb.Rate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_RATE64, nl.Uint64Attr(htb.Rate)) + } + if htb.Ceil >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_CEIL64, nl.Uint64Attr(htb.Ceil)) + } case "hfsc": hfsc := class.(*HfscClass) opt := nl.HfscCopt{} - opt.Rsc.Set(hfsc.Rsc.Attrs()) - opt.Fsc.Set(hfsc.Fsc.Attrs()) - opt.Usc.Set(hfsc.Usc.Attrs()) + rm1, rd, rm2 := hfsc.Rsc.Attrs() + opt.Rsc.Set(rm1/8, rd, rm2/8) + fm1, fd, fm2 := hfsc.Fsc.Attrs() + opt.Fsc.Set(fm1/8, fd, fm2/8) + um1, ud, um2 := hfsc.Usc.Attrs() + opt.Usc.Set(um1/8, ud, um2/8) options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc)) options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc)) options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc)) @@ -303,6 +312,10 @@ func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, erro htb.Quantum = opt.Quantum htb.Level = opt.Level htb.Prio = opt.Prio + case nl.TCA_HTB_RATE64: + htb.Rate = native.Uint64(datum.Value[0:8]) + case nl.TCA_HTB_CEIL64: + htb.Ceil = native.Uint64(datum.Value[0:8]) } } return detailed, nil @@ -315,11 +328,11 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err m1, d, m2 := nl.DeserializeHfscCurve(datum.Value).Attrs() switch datum.Attr.Type { case nl.TCA_HFSC_RSC: - hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2} + hfsc.Rsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} case nl.TCA_HFSC_FSC: - hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2} + hfsc.Fsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} case nl.TCA_HFSC_USC: - hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2} + hfsc.Usc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} } } return detailed, nil @@ -328,7 +341,6 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err func parseTcStats(data []byte) (*ClassStatistics, error) { buf := &bytes.Buffer{} buf.Write(data) - native := nl.NativeEndian() tcStats := &tcStats{} if err := binary.Read(buf, native, tcStats); err != nil { return nil, err @@ -350,7 +362,6 @@ func parseTcStats(data []byte) (*ClassStatistics, error) { func parseGnetStats(data []byte, gnetStats interface{}) error { buf := &bytes.Buffer{} buf.Write(data) - native := nl.NativeEndian() return binary.Read(buf, native, gnetStats) } @@ -377,6 +388,11 @@ func parseTcStats2(data []byte) (*ClassStatistics, error) { return nil, fmt.Errorf("Failed to parse ClassStatistics.RateEst with: %v\n%s", err, hex.Dump(datum.Value)) } + case nl.TCA_STATS_BASIC_HW: + if err := parseGnetStats(datum.Value, stats.BasicHw); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.BasicHw with: %v\n%s", + err, hex.Dump(datum.Value)) + } } } diff --git a/vendor/github.com/vishvananda/netlink/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/conntrack_linux.go index 4bff0dcbab0..ba022453b3b 100644 --- a/vendor/github.com/vishvananda/netlink/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/conntrack_linux.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net" + "time" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" @@ -54,10 +55,30 @@ func ConntrackTableFlush(table ConntrackTableType) error { return pkgHandle.ConntrackTableFlush(table) } +// ConntrackCreate creates a new conntrack flow in the desired table +// conntrack -I [table] Create a conntrack or expectation +func ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + return pkgHandle.ConntrackCreate(table, family, flow) +} + +// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle +// conntrack -U [table] Update a conntrack +func ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + return pkgHandle.ConntrackUpdate(table, family, flow) +} + // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [ConntrackDeleteFilter] instead. func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { - return pkgHandle.ConntrackDeleteFilter(table, family, filter) + return pkgHandle.ConntrackDeleteFilters(table, family, filter) +} + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters +// conntrack -D [table] parameters Delete conntrack or expectation +func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { + return pkgHandle.ConntrackDeleteFilters(table, family, filters...) } // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed @@ -86,9 +107,51 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { return err } +// ConntrackCreate creates a new conntrack flow in the desired table using the handle +// conntrack -I [table] Create a conntrack or expectation +func (h *Handle) ConntrackCreate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_CREATE) + attr, err := flow.toNlData() + if err != nil { + return err + } + + for _, a := range attr { + req.AddData(a) + } + + _, err = req.Execute(unix.NETLINK_NETFILTER, 0) + return err +} + +// ConntrackUpdate updates an existing conntrack flow in the desired table using the handle +// conntrack -U [table] Update a conntrack +func (h *Handle) ConntrackUpdate(table ConntrackTableType, family InetFamily, flow *ConntrackFlow) error { + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_NEW, unix.NLM_F_ACK|unix.NLM_F_REPLACE) + attr, err := flow.toNlData() + if err != nil { + return err + } + + for _, a := range attr { + req.AddData(a) + } + + _, err = req.Execute(unix.NETLINK_NETFILTER, 0) + return err +} + // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [Handle.ConntrackDeleteFilters] instead. func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter CustomConntrackFilter) (uint, error) { + return h.ConntrackDeleteFilters(table, family, filter) +} + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed +// conntrack -D [table] parameters Delete conntrack or expectation +func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { res, err := h.dumpConntrackTable(table, family) if err != nil { return 0, err @@ -97,12 +160,16 @@ func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFami var matched uint for _, dataRaw := range res { flow := parseRawData(dataRaw) - if match := filter.MatchConntrackFlow(flow); match { - req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) - // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already - req2.AddRawData(dataRaw[4:]) - req2.Execute(unix.NETLINK_NETFILTER, 0) - matched++ + for _, filter := range filters { + if match := filter.MatchConntrackFlow(flow); match { + req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) + // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already + req2.AddRawData(dataRaw[4:]) + req2.Execute(unix.NETLINK_NETFILTER, 0) + matched++ + // flow is already deleted, no need to match on other filters and continue to the next flow. + break + } } } @@ -127,10 +194,44 @@ func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) return req.Execute(unix.NETLINK_NETFILTER, 0) } +// ProtoInfo wraps an L4-protocol structure - roughly corresponds to the +// __nfct_protoinfo union found in libnetfilter_conntrack/include/internal/object.h. +// Currently, only protocol names, and TCP state is supported. +type ProtoInfo interface { + Protocol() string +} + +// ProtoInfoTCP corresponds to the `tcp` struct of the __nfct_protoinfo union. +// Only TCP state is currently supported. +type ProtoInfoTCP struct { + State uint8 +} +// Protocol returns "tcp". +func (*ProtoInfoTCP) Protocol() string {return "tcp"} +func (p *ProtoInfoTCP) toNlData() ([]*nl.RtAttr, error) { + ctProtoInfo := nl.NewRtAttr(unix.NLA_F_NESTED | nl.CTA_PROTOINFO, []byte{}) + ctProtoInfoTCP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_PROTOINFO_TCP, []byte{}) + ctProtoInfoTCPState := nl.NewRtAttr(nl.CTA_PROTOINFO_TCP_STATE, nl.Uint8Attr(p.State)) + ctProtoInfoTCP.AddChild(ctProtoInfoTCPState) + ctProtoInfo.AddChild(ctProtoInfoTCP) + + return []*nl.RtAttr{ctProtoInfo}, nil +} + +// ProtoInfoSCTP only supports the protocol name. +type ProtoInfoSCTP struct {} +// Protocol returns "sctp". +func (*ProtoInfoSCTP) Protocol() string {return "sctp"} + +// ProtoInfoDCCP only supports the protocol name. +type ProtoInfoDCCP struct {} +// Protocol returns "dccp". +func (*ProtoInfoDCCP) Protocol() string {return "dccp"} + // The full conntrack flow structure is very complicated and can be found in the file: // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h // For the time being, the structure below allows to parse and extract the base information of a flow -type ipTuple struct { +type IPTuple struct { Bytes uint64 DstIP net.IP DstPort uint16 @@ -140,21 +241,150 @@ type ipTuple struct { SrcPort uint16 } +// toNlData generates the inner fields of a nested tuple netlink datastructure +// does not generate the "nested"-flagged outer message. +func (t *IPTuple) toNlData(family uint8) ([]*nl.RtAttr, error) { + + var srcIPsFlag, dstIPsFlag int + if family == nl.FAMILY_V4 { + srcIPsFlag = nl.CTA_IP_V4_SRC + dstIPsFlag = nl.CTA_IP_V4_DST + } else if family == nl.FAMILY_V6 { + srcIPsFlag = nl.CTA_IP_V6_SRC + dstIPsFlag = nl.CTA_IP_V6_DST + } else { + return []*nl.RtAttr{}, fmt.Errorf("couldn't generate netlink message for tuple due to unrecognized FamilyType '%d'", family) + } + + ctTupleIP := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_IP, nil) + ctTupleIPSrc := nl.NewRtAttr(srcIPsFlag, t.SrcIP) + ctTupleIP.AddChild(ctTupleIPSrc) + ctTupleIPDst := nl.NewRtAttr(dstIPsFlag, t.DstIP) + ctTupleIP.AddChild(ctTupleIPDst) + + ctTupleProto := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_PROTO, nil) + ctTupleProtoNum := nl.NewRtAttr(nl.CTA_PROTO_NUM, []byte{t.Protocol}) + ctTupleProto.AddChild(ctTupleProtoNum) + ctTupleProtoSrcPort := nl.NewRtAttr(nl.CTA_PROTO_SRC_PORT, nl.BEUint16Attr(t.SrcPort)) + ctTupleProto.AddChild(ctTupleProtoSrcPort) + ctTupleProtoDstPort := nl.NewRtAttr(nl.CTA_PROTO_DST_PORT, nl.BEUint16Attr(t.DstPort)) + ctTupleProto.AddChild(ctTupleProtoDstPort, ) + + return []*nl.RtAttr{ctTupleIP, ctTupleProto}, nil +} + type ConntrackFlow struct { FamilyType uint8 - Forward ipTuple - Reverse ipTuple + Forward IPTuple + Reverse IPTuple Mark uint32 + Zone uint16 + TimeStart uint64 + TimeStop uint64 + TimeOut uint32 + Labels []byte + ProtoInfo ProtoInfo } func (s *ConntrackFlow) String() string { // conntrack cmd output: - // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 - return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d", + // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 labels=0x00000000050012ac4202010000000000 zone=100 + // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec) + start := time.Unix(0, int64(s.TimeStart)) + stop := time.Unix(0, int64(s.TimeStop)) + timeout := int32(s.TimeOut) + res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x ", nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes, s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes, s.Mark) + if len(s.Labels) > 0 { + res += fmt.Sprintf("labels=0x%x ", s.Labels) + } + if s.Zone != 0 { + res += fmt.Sprintf("zone=%d ", s.Zone) + } + res += fmt.Sprintf("start=%v stop=%v timeout=%d(sec)", start, stop, timeout) + return res +} + +// toNlData generates netlink messages representing the flow. +func (s *ConntrackFlow) toNlData() ([]*nl.RtAttr, error) { + var payload []*nl.RtAttr + // The message structure is built as follows: + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + + // CTA_TUPLE_ORIG + ctTupleOrig := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_ORIG, nil) + forwardFlowAttrs, err := s.Forward.toNlData(s.FamilyType) + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack forward flow: %w", err) + } + for _, a := range forwardFlowAttrs { + ctTupleOrig.AddChild(a) + } + + // CTA_TUPLE_REPLY + ctTupleReply := nl.NewRtAttr(unix.NLA_F_NESTED|nl.CTA_TUPLE_REPLY, nil) + reverseFlowAttrs, err := s.Reverse.toNlData(s.FamilyType) + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack reverse flow: %w", err) + } + for _, a := range reverseFlowAttrs { + ctTupleReply.AddChild(a) + } + + ctMark := nl.NewRtAttr(nl.CTA_MARK, nl.BEUint32Attr(s.Mark)) + ctTimeout := nl.NewRtAttr(nl.CTA_TIMEOUT, nl.BEUint32Attr(s.TimeOut)) + + payload = append(payload, ctTupleOrig, ctTupleReply, ctMark, ctTimeout) + + if s.ProtoInfo != nil { + switch p := s.ProtoInfo.(type) { + case *ProtoInfoTCP: + attrs, err := p.toNlData() + if err != nil { + return nil, fmt.Errorf("couldn't generate netlink data for conntrack flow's TCP protoinfo: %w", err) + } + payload = append(payload, attrs...) + default: + return nil, errors.New("couldn't generate netlink data for conntrack: field 'ProtoInfo' only supports TCP or nil") + } + } + + return payload, nil } // This method parse the ip tuple structure @@ -164,7 +394,7 @@ func (s *ConntrackFlow) String() string { // // // -func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { +func parseIpTuple(reader *bytes.Reader, tpl *IPTuple) uint8 { for i := 0; i < 2; i++ { _, t, _, v := parseNfAttrTLV(reader) switch t { @@ -174,25 +404,43 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { tpl.DstIP = v } } - // Skip the next 4 bytes nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO - reader.Seek(4, seekCurrent) - _, t, _, v := parseNfAttrTLV(reader) + // Get total length of nested protocol-specific info. + _, _, protoInfoTotalLen := parseNfAttrTL(reader) + _, t, l, v := parseNfAttrTLV(reader) + // Track the number of bytes read. + protoInfoBytesRead := uint16(nl.SizeofNfattr) + l if t == nl.CTA_PROTO_NUM { tpl.Protocol = uint8(v[0]) } - // Skip some padding 3 bytes + // We only parse TCP & UDP headers. Skip the others. + if tpl.Protocol != unix.IPPROTO_TCP && tpl.Protocol != unix.IPPROTO_UDP { + // skip the rest + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol + } + // Skip 3 bytes of padding reader.Seek(3, seekCurrent) + protoInfoBytesRead += 3 for i := 0; i < 2; i++ { _, t, _ := parseNfAttrTL(reader) + protoInfoBytesRead += uint16(nl.SizeofNfattr) switch t { case nl.CTA_PROTO_SRC_PORT: parseBERaw16(reader, &tpl.SrcPort) + protoInfoBytesRead += 2 case nl.CTA_PROTO_DST_PORT: parseBERaw16(reader, &tpl.DstPort) + protoInfoBytesRead += 2 } - // Skip some padding 2 byte + // Skip 2 bytes of padding reader.Seek(2, seekCurrent) + protoInfoBytesRead += 2 } + // Skip any remaining/unknown parts of the message + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol } @@ -211,10 +459,18 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) { binary.Read(r, nl.NativeEndian(), &attrType) isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED attrType = attrType & (nl.NLA_F_NESTED - 1) - return isNested, attrType, len } +// skipNfAttrValue seeks `r` past attr of length `len`. +// Maintains buffer alignment. +// Returns length of the seek performed. +func skipNfAttrValue(r *bytes.Reader, len uint16) uint16 { + len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1) + r.Seek(int64(len), seekCurrent) + return len +} + func parseBERaw16(r *bytes.Reader, v *uint16) { binary.Read(r, binary.BigEndian, v) } @@ -227,6 +483,10 @@ func parseBERaw64(r *bytes.Reader, v *uint64) { binary.Read(r, binary.BigEndian, v) } +func parseRaw32(r *bytes.Reader, v *uint32) { + binary.Read(r, nl.NativeEndian(), v) +} + func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { for i := 0; i < 2; i++ { switch _, t, _ := parseNfAttrTL(r); t { @@ -241,11 +501,107 @@ func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { return } +// when the flow is alive, only the timestamp_start is returned in structure +func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) { + var numTimeStamps int + oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp + if readSize == uint16(oneItem) { + numTimeStamps = 1 + } else if readSize == 2*uint16(oneItem) { + numTimeStamps = 2 + } else { + return + } + for i := 0; i < numTimeStamps; i++ { + switch _, t, _ := parseNfAttrTL(r); t { + case nl.CTA_TIMESTAMP_START: + parseBERaw64(r, &tstart) + case nl.CTA_TIMESTAMP_STOP: + parseBERaw64(r, &tstop) + default: + return + } + } + return + +} + +func parseProtoInfoTCPState(r *bytes.Reader) (s uint8) { + binary.Read(r, binary.BigEndian, &s) + r.Seek(nl.SizeofNfattr - 1, seekCurrent) + return s +} + +// parseProtoInfoTCP reads the entire nested protoinfo structure, but only parses the state attr. +func parseProtoInfoTCP(r *bytes.Reader, attrLen uint16) (*ProtoInfoTCP) { + p := new(ProtoInfoTCP) + bytesRead := 0 + for bytesRead < int(attrLen) { + _, t, l := parseNfAttrTL(r) + bytesRead += nl.SizeofNfattr + + switch t { + case nl.CTA_PROTOINFO_TCP_STATE: + p.State = parseProtoInfoTCPState(r) + bytesRead += nl.SizeofNfattr + default: + bytesRead += int(skipNfAttrValue(r, l)) + } + } + + return p +} + +func parseProtoInfo(r *bytes.Reader, attrLen uint16) (p ProtoInfo) { + bytesRead := 0 + for bytesRead < int(attrLen) { + _, t, l := parseNfAttrTL(r) + bytesRead += nl.SizeofNfattr + + switch t { + case nl.CTA_PROTOINFO_TCP: + p = parseProtoInfoTCP(r, l) + bytesRead += int(l) + // No inner fields of DCCP / SCTP currently supported. + case nl.CTA_PROTOINFO_DCCP: + p = new(ProtoInfoDCCP) + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + case nl.CTA_PROTOINFO_SCTP: + p = new(ProtoInfoSCTP) + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + default: + skipped := skipNfAttrValue(r, l) + bytesRead += int(skipped) + } + } + + return p +} + +func parseTimeOut(r *bytes.Reader) (ttimeout uint32) { + parseBERaw32(r, &ttimeout) + return +} + func parseConnectionMark(r *bytes.Reader) (mark uint32) { parseBERaw32(r, &mark) return } +func parseConnectionLabels(r *bytes.Reader) (label []byte) { + label = make([]byte, 16) // netfilter defines 128 bit labels value + binary.Read(r, nl.NativeEndian(), &label) + return +} + +func parseConnectionZone(r *bytes.Reader) (zone uint16) { + parseBERaw16(r, &zone) + r.Seek(2, seekCurrent) + return +} + func parseRawData(data []byte) *ConntrackFlow { s := &ConntrackFlow{} // First there is the Nfgenmsg header @@ -266,25 +622,41 @@ func parseRawData(data []byte) *ConntrackFlow { if nested, t, l := parseNfAttrTL(reader); nested { switch t { case nl.CTA_TUPLE_ORIG: - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { parseIpTuple(reader, &s.Forward) } case nl.CTA_TUPLE_REPLY: - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { parseIpTuple(reader, &s.Reverse) } else { // Header not recognized skip it - reader.Seek(int64(l), seekCurrent) + skipNfAttrValue(reader, l) } case nl.CTA_COUNTERS_ORIG: s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader) case nl.CTA_COUNTERS_REPLY: s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader) + case nl.CTA_TIMESTAMP: + s.TimeStart, s.TimeStop = parseTimeStamp(reader, l) + case nl.CTA_PROTOINFO: + s.ProtoInfo = parseProtoInfo(reader, l) + default: + skipNfAttrValue(reader, l) } } else { switch t { case nl.CTA_MARK: s.Mark = parseConnectionMark(reader) + case nl.CTA_LABELS: + s.Labels = parseConnectionLabels(reader) + case nl.CTA_TIMEOUT: + s.TimeOut = parseTimeOut(reader) + case nl.CTA_ID, nl.CTA_STATUS, nl.CTA_USE: + skipNfAttrValue(reader, l) + case nl.CTA_ZONE: + s.Zone = parseConnectionZone(reader) + default: + skipNfAttrValue(reader, l) } } } @@ -318,18 +690,27 @@ func parseRawData(data []byte) *ConntrackFlow { // --mask-src ip Source mask address // --mask-dst ip Destination mask address +// Layer 4 Protocol common parameters and options: +// TCP, UDP, SCTP, UDPLite and DCCP +// --sport, --orig-port-src port Source port in original direction +// --dport, --orig-port-dst port Destination port in original direction + // Filter types type ConntrackFilterType uint8 const ( - ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction - ConntrackOrigDstIP // -orig-dst ip Destination address from original direction - ConntrackReplySrcIP // --reply-src ip Reply Source IP - ConntrackReplyDstIP // --reply-dst ip Reply Destination IP - ConntrackReplyAnyIP // Match source or destination reply IP - ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP - ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP - ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instaed ConntrackReplyAnyIP + ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction + ConntrackOrigDstIP // -orig-dst ip Destination address from original direction + ConntrackReplySrcIP // --reply-src ip Reply Source IP + ConntrackReplyDstIP // --reply-dst ip Reply Destination IP + ConntrackReplyAnyIP // Match source or destination reply IP + ConntrackOrigSrcPort // --orig-port-src port Source port in original direction + ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction + ConntrackMatchLabels // --label label1,label2 Labels used in entry + ConntrackUnmatchLabels // --label label1,label2 Labels not used in entry + ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP + ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP + ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP ) type CustomConntrackFilter interface { @@ -339,53 +720,180 @@ type CustomConntrackFilter interface { } type ConntrackFilter struct { - ipFilter map[ConntrackFilterType]net.IP + ipNetFilter map[ConntrackFilterType]*net.IPNet + portFilter map[ConntrackFilterType]uint16 + protoFilter uint8 + labelFilter map[ConntrackFilterType][][]byte + zoneFilter *uint16 +} + +// AddIPNet adds a IP subnet to the conntrack filter +func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error { + if ipNet == nil { + return fmt.Errorf("Filter attribute empty") + } + if f.ipNetFilter == nil { + f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet) + } + if _, ok := f.ipNetFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.ipNetFilter[tp] = ipNet + return nil } // AddIP adds an IP to the conntrack filter func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error { - if f.ipFilter == nil { - f.ipFilter = make(map[ConntrackFilterType]net.IP) + if ip == nil { + return fmt.Errorf("Filter attribute empty") + } + return f.AddIPNet(tp, NewIPNet(ip)) +} + +// AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it +func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error { + switch f.protoFilter { + // TCP, UDP, DCCP, SCTP, UDPLite + case 6, 17, 33, 132, 136: + default: + return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter) } - if _, ok := f.ipFilter[tp]; ok { + + if f.portFilter == nil { + f.portFilter = make(map[ConntrackFilterType]uint16) + } + if _, ok := f.portFilter[tp]; ok { return errors.New("Filter attribute already present") } - f.ipFilter[tp] = ip + f.portFilter[tp] = port + return nil +} + +// AddProtocol adds the Layer 4 protocol to the conntrack filter +func (f *ConntrackFilter) AddProtocol(proto uint8) error { + if f.protoFilter != 0 { + return errors.New("Filter attribute already present") + } + f.protoFilter = proto + return nil +} + +// AddLabels adds the provided list (zero or more) of labels to the conntrack filter +// ConntrackFilterType here can be either: +// 1. ConntrackMatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0) +// against the list of provided labels. If `flow.Labels` contains ALL the provided labels +// it is considered a match. This can be used when you want to match flows that contain +// one or more labels. +// 2. ConntrackUnmatchLabels: This matches every flow that has a label value (len(flow.Labels) > 0) +// against the list of provided labels. If `flow.Labels` does NOT contain ALL the provided labels +// it is considered a match. This can be used when you want to match flows that don't contain +// one or more labels. +func (f *ConntrackFilter) AddLabels(tp ConntrackFilterType, labels [][]byte) error { + if len(labels) == 0 { + return errors.New("Invalid length for provided labels") + } + if f.labelFilter == nil { + f.labelFilter = make(map[ConntrackFilterType][][]byte) + } + if _, ok := f.labelFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.labelFilter[tp] = labels + return nil +} + +// AddZone adds a zone to the conntrack filter +func (f *ConntrackFilter) AddZone(zone uint16) error { + if f.zoneFilter != nil { + return errors.New("Filter attribute already present") + } + f.zoneFilter = &zone return nil } // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter // false otherwise func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { - if len(f.ipFilter) == 0 { + if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 && len(f.labelFilter) == 0 && f.zoneFilter == nil { // empty filter always not match return false } - match := true - // -orig-src ip Source address from original direction - if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found { - match = match && elem.Equal(flow.Forward.SrcIP) + // -p, --protonum proto Layer 4 Protocol, eg. 'tcp' + if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter { + // different Layer 4 protocol always not match + return false } - // -orig-dst ip Destination address from original direction - if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found { - match = match && elem.Equal(flow.Forward.DstIP) + // Conntrack zone filter + if f.zoneFilter != nil && *f.zoneFilter != flow.Zone { + return false } - // -src-nat ip Source NAT ip - if elem, found := f.ipFilter[ConntrackReplySrcIP]; match && found { - match = match && elem.Equal(flow.Reverse.SrcIP) + match := true + + // IP conntrack filter + if len(f.ipNetFilter) > 0 { + // -orig-src ip Source address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found { + match = match && elem.Contains(flow.Forward.SrcIP) + } + + // -orig-dst ip Destination address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found { + match = match && elem.Contains(flow.Forward.DstIP) + } + + // -src-nat ip Source NAT ip + if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found { + match = match && elem.Contains(flow.Reverse.SrcIP) + } + + // -dst-nat ip Destination NAT ip + if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found { + match = match && elem.Contains(flow.Reverse.DstIP) + } + + // Match source or destination reply IP + if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found { + match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP)) + } } - // -dst-nat ip Destination NAT ip - if elem, found := f.ipFilter[ConntrackReplyDstIP]; match && found { - match = match && elem.Equal(flow.Reverse.DstIP) + // Layer 4 Port filter + if len(f.portFilter) > 0 { + // -orig-port-src port Source port from original direction + if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found { + match = match && elem == flow.Forward.SrcPort + } + + // -orig-port-dst port Destination port from original direction + if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found { + match = match && elem == flow.Forward.DstPort + } } - // Match source or destination reply IP - if elem, found := f.ipFilter[ConntrackReplyAnyIP]; match && found { - match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP)) + // Label filter + if len(f.labelFilter) > 0 { + if len(flow.Labels) > 0 { + // --label label1,label2 in conn entry; + // every label passed should be contained in flow.Labels for a match to be true + if elem, found := f.labelFilter[ConntrackMatchLabels]; match && found { + for _, label := range elem { + match = match && (bytes.Contains(flow.Labels, label)) + } + } + // --label label1,label2 in conn entry; + // every label passed should be not contained in flow.Labels for a match to be true + if elem, found := f.labelFilter[ConntrackUnmatchLabels]; match && found { + for _, label := range elem { + match = match && !(bytes.Contains(flow.Labels, label)) + } + } + } else { + // flow doesn't contain labels, so it doesn't contain or notContain any provided matches + match = false + } } return match diff --git a/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go b/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go index af7af799e77..0bfdf422d1e 100644 --- a/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/conntrack_unspecified.go @@ -11,6 +11,9 @@ type InetFamily uint8 // ConntrackFlow placeholder type ConntrackFlow struct{} +// CustomConntrackFilter placeholder +type CustomConntrackFilter struct{} + // ConntrackFilter placeholder type ConntrackFilter struct{} @@ -29,10 +32,18 @@ func ConntrackTableFlush(table ConntrackTableType) error { // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [ConntrackDeleteFilter] instead. func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { return 0, ErrNotImplemented } +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters +// conntrack -D [table] parameters Delete conntrack or expectation +func ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { + return 0, ErrNotImplemented +} + // ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed // conntrack -L [table] [options] List conntrack or expectation table func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) { @@ -48,6 +59,14 @@ func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { // ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed // conntrack -D [table] parameters Delete conntrack or expectation +// +// Deprecated: use [Handle.ConntrackDeleteFilters] instead. func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) { return 0, ErrNotImplemented } + +// ConntrackDeleteFilters deletes entries on the specified table matching any of the specified filters using the netlink handle passed +// conntrack -D [table] parameters Delete conntrack or expectation +func (h *Handle) ConntrackDeleteFilters(table ConntrackTableType, family InetFamily, filters ...CustomConntrackFilter) (uint, error) { + return 0, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/devlink_linux.go b/vendor/github.com/vishvananda/netlink/devlink_linux.go index 29b3f8ec1d2..d98801dbbe5 100644 --- a/vendor/github.com/vishvananda/netlink/devlink_linux.go +++ b/vendor/github.com/vishvananda/netlink/devlink_linux.go @@ -1,9 +1,11 @@ package netlink import ( + "fmt" + "net" + "strings" "syscall" - "fmt" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) @@ -27,6 +29,325 @@ type DevlinkDevice struct { Attrs DevlinkDevAttrs } +// DevlinkPortFn represents port function and its attributes +type DevlinkPortFn struct { + HwAddr net.HardwareAddr + State uint8 + OpState uint8 +} + +// DevlinkPortFnSetAttrs represents attributes to set +type DevlinkPortFnSetAttrs struct { + FnAttrs DevlinkPortFn + HwAddrValid bool + StateValid bool +} + +// DevlinkPort represents port and its attributes +type DevlinkPort struct { + BusName string + DeviceName string + PortIndex uint32 + PortType uint16 + NetdeviceName string + NetdevIfIndex uint32 + RdmaDeviceName string + PortFlavour uint16 + Fn *DevlinkPortFn +} + +type DevLinkPortAddAttrs struct { + Controller uint32 + SfNumber uint32 + PortIndex uint32 + PfNumber uint16 + SfNumberValid bool + PortIndexValid bool + ControllerValid bool +} + +// DevlinkDeviceInfo represents devlink info +type DevlinkDeviceInfo struct { + Driver string + SerialNumber string + BoardID string + FwApp string + FwAppBoundleID string + FwAppName string + FwBoundleID string + FwMgmt string + FwMgmtAPI string + FwMgmtBuild string + FwNetlist string + FwNetlistBuild string + FwPsidAPI string + FwUndi string +} + +// DevlinkResource represents a device resource +type DevlinkResource struct { + Name string + ID uint64 + Size uint64 + SizeNew uint64 + SizeMin uint64 + SizeMax uint64 + SizeGranularity uint64 + PendingChange bool + Unit uint8 + SizeValid bool + OCCValid bool + OCCSize uint64 + Parent *DevlinkResource + Children []DevlinkResource +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkResource, returns error if occured +func (dlr *DevlinkResource) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error { + var attr syscall.NetlinkRouteAttr + var ok bool + + // mandatory attributes + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_ID] + if !ok { + return fmt.Errorf("missing resource id") + } + dlr.ID = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_NAME] + if !ok { + return fmt.Errorf("missing resource name") + } + dlr.Name = nl.BytesToString(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE] + if !ok { + return fmt.Errorf("missing resource size") + } + dlr.Size = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_GRAN] + if !ok { + return fmt.Errorf("missing resource size granularity") + } + dlr.SizeGranularity = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_UNIT] + if !ok { + return fmt.Errorf("missing resource unit") + } + dlr.Unit = uint8(attr.Value[0]) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MIN] + if !ok { + return fmt.Errorf("missing resource size min") + } + dlr.SizeMin = native.Uint64(attr.Value) + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_MAX] + if !ok { + return fmt.Errorf("missing resource size max") + } + dlr.SizeMax = native.Uint64(attr.Value) + + // optional attributes + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_OCC] + if ok { + dlr.OCCSize = native.Uint64(attr.Value) + dlr.OCCValid = true + } + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_VALID] + if ok { + dlr.SizeValid = uint8(attr.Value[0]) != 0 + } + + dlr.SizeNew = dlr.Size + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_SIZE_NEW] + if ok { + dlr.SizeNew = native.Uint64(attr.Value) + } + + dlr.PendingChange = dlr.Size != dlr.SizeNew + + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST] + if ok { + // handle nested resoruces recursively + subResources, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + + for _, subresource := range subResources { + resource := DevlinkResource{Parent: dlr} + attrs, err := nl.ParseRouteAttrAsMap(subresource.Value) + if err != nil { + return err + } + err = resource.parseAttributes(attrs) + if err != nil { + return fmt.Errorf("failed to parse child resource, parent:%s. %w", dlr.Name, err) + } + dlr.Children = append(dlr.Children, resource) + } + } + return nil +} + +// DevlinkResources represents all devlink resources of a devlink device +type DevlinkResources struct { + Bus string + Device string + Resources []DevlinkResource +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkResources, returns error if occured +func (dlrs *DevlinkResources) parseAttributes(attrs map[uint16]syscall.NetlinkRouteAttr) error { + var attr syscall.NetlinkRouteAttr + var ok bool + + // Bus + attr, ok = attrs[nl.DEVLINK_ATTR_BUS_NAME] + if !ok { + return fmt.Errorf("missing bus name") + } + dlrs.Bus = nl.BytesToString(attr.Value) + + // Device + attr, ok = attrs[nl.DEVLINK_ATTR_DEV_NAME] + if !ok { + return fmt.Errorf("missing device name") + } + dlrs.Device = nl.BytesToString(attr.Value) + + // Resource List + attr, ok = attrs[nl.DEVLINK_ATTR_RESOURCE_LIST] + if !ok { + return fmt.Errorf("missing resource list") + } + + resourceAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + + for _, resourceAttr := range resourceAttrs { + resource := DevlinkResource{} + attrs, err := nl.ParseRouteAttrAsMap(resourceAttr.Value) + if err != nil { + return err + } + err = resource.parseAttributes(attrs) + if err != nil { + return fmt.Errorf("failed to parse root resoruces, %w", err) + } + dlrs.Resources = append(dlrs.Resources, resource) + } + + return nil +} + +// DevlinkParam represents parameter of the device +type DevlinkParam struct { + Name string + IsGeneric bool + Type uint8 // possible values are in nl.DEVLINK_PARAM_TYPE_* constants + Values []DevlinkParamValue +} + +// DevlinkParamValue contains values of the parameter +// Data field contains specific type which can be casted by unsing info from the DevlinkParam.Type field +type DevlinkParamValue struct { + rawData []byte + Data interface{} + CMODE uint8 // possible values are in nl.DEVLINK_PARAM_CMODE_* constants +} + +// parseAttributes parses provided Netlink Attributes and populates DevlinkParam, returns error if occured +func (dlp *DevlinkParam) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + var valuesList [][]syscall.NetlinkRouteAttr + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM: + nattrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + for _, nattr := range nattrs { + switch nattr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM_NAME: + dlp.Name = nl.BytesToString(nattr.Value) + case nl.DEVLINK_ATTR_PARAM_GENERIC: + dlp.IsGeneric = true + case nl.DEVLINK_ATTR_PARAM_TYPE: + if len(nattr.Value) == 1 { + dlp.Type = nattr.Value[0] + } + case nl.DEVLINK_ATTR_PARAM_VALUES_LIST: + nnattrs, err := nl.ParseRouteAttr(nattr.Value) + if err != nil { + return err + } + valuesList = append(valuesList, nnattrs) + } + } + } + } + for _, valAttr := range valuesList { + v := DevlinkParamValue{} + if err := v.parseAttributes(valAttr, dlp.Type); err != nil { + return err + } + dlp.Values = append(dlp.Values, v) + } + return nil +} + +func (dlpv *DevlinkParamValue) parseAttributes(attrs []syscall.NetlinkRouteAttr, paramType uint8) error { + for _, attr := range attrs { + nattrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return err + } + var rawData []byte + for _, nattr := range nattrs { + switch nattr.Attr.Type { + case nl.DEVLINK_ATTR_PARAM_VALUE_DATA: + rawData = nattr.Value + case nl.DEVLINK_ATTR_PARAM_VALUE_CMODE: + if len(nattr.Value) == 1 { + dlpv.CMODE = nattr.Value[0] + } + } + } + switch paramType { + case nl.DEVLINK_PARAM_TYPE_U8: + dlpv.Data = uint8(0) + if rawData != nil && len(rawData) == 1 { + dlpv.Data = uint8(rawData[0]) + } + case nl.DEVLINK_PARAM_TYPE_U16: + dlpv.Data = uint16(0) + if rawData != nil { + dlpv.Data = native.Uint16(rawData) + } + case nl.DEVLINK_PARAM_TYPE_U32: + dlpv.Data = uint32(0) + if rawData != nil { + dlpv.Data = native.Uint32(rawData) + } + case nl.DEVLINK_PARAM_TYPE_STRING: + dlpv.Data = "" + if rawData != nil { + dlpv.Data = nl.BytesToString(rawData) + } + case nl.DEVLINK_PARAM_TYPE_BOOL: + dlpv.Data = rawData != nil + } + } + return nil +} + func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) { devices := make([]*DevlinkDevice, 0, len(msgs)) for _, m := range msgs { @@ -95,9 +416,9 @@ func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error for _, a := range attrs { switch a.Attr.Type { case nl.DEVLINK_ATTR_BUS_NAME: - d.BusName = string(a.Value) + d.BusName = string(a.Value[:len(a.Value)-1]) case nl.DEVLINK_ATTR_DEV_NAME: - d.DeviceName = string(a.Value) + d.DeviceName = string(a.Value[:len(a.Value)-1]) case nl.DEVLINK_ATTR_ESWITCH_MODE: d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value)) case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE: @@ -126,12 +447,12 @@ func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) { req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK) req.AddData(msg) - b := make([]byte, len(dev.BusName)) + b := make([]byte, len(dev.BusName)+1) copy(b, dev.BusName) data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b) req.AddData(data) - b = make([]byte, len(dev.DeviceName)) + b = make([]byte, len(dev.DeviceName)+1) copy(b, dev.DeviceName) data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b) req.AddData(data) @@ -270,3 +591,569 @@ func (h *Handle) DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error func DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error { return pkgHandle.DevLinkSetEswitchMode(Dev, NewMode) } + +func (port *DevlinkPort) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + for _, a := range attrs { + switch a.Attr.Type { + case nl.DEVLINK_ATTR_BUS_NAME: + port.BusName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_DEV_NAME: + port.DeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_INDEX: + port.PortIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_TYPE: + port.PortType = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_NETDEV_NAME: + port.NetdeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_NETDEV_IFINDEX: + port.NetdevIfIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_IBDEV_NAME: + port.RdmaDeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_FLAVOUR: + port.PortFlavour = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_FUNCTION: + port.Fn = &DevlinkPortFn{} + for nested := range nl.ParseAttributes(a.Value) { + switch nested.Type { + case nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR: + port.Fn.HwAddr = nested.Value[:] + case nl.DEVLINK_PORT_FN_ATTR_STATE: + port.Fn.State = uint8(nested.Value[0]) + case nl.DEVLINK_PORT_FN_ATTR_OPSTATE: + port.Fn.OpState = uint8(nested.Value[0]) + } + } + } + } + return nil +} + +func parseDevLinkAllPortList(msgs [][]byte) ([]*DevlinkPort, error) { + ports := make([]*DevlinkPort, 0, len(msgs)) + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + ports = append(ports, port) + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, err + } + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_PORT_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP) + req.AddData(msg) + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + ports, err := parseDevLinkAllPortList(msgs) + if err != nil { + return nil, err + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func DevLinkGetAllPortList() ([]*DevlinkPort, error) { + return pkgHandle.DevLinkGetAllPortList() +} + +func parseDevlinkPortMsg(msgs [][]byte) (*DevlinkPort, error) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + return port, nil +} + +// DevLinkGetPortByIndexprovides a pointer to devlink device and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_GET, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevlinkGetDeviceResources returns devlink device resources +func DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) { + return pkgHandle.DevlinkGetDeviceResources(bus, device) +} + +// DevlinkGetDeviceResources returns devlink device resources +func (h *Handle) DevlinkGetDeviceResources(bus string, device string) (*DevlinkResources, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_RESOURCE_DUMP, bus, device) + if err != nil { + return nil, err + } + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + var resources DevlinkResources + for _, m := range respmsg { + attrs, err := nl.ParseRouteAttrAsMap(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + resources.parseAttributes(attrs) + } + + return &resources, nil +} + +// DevlinkGetDeviceParams returns parameters for devlink device +// Equivalent to: `devlink dev param show /` +func (h *Handle) DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device) + if err != nil { + return nil, err + } + req.Flags |= unix.NLM_F_DUMP + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + var params []*DevlinkParam + for _, m := range respmsg { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + p := &DevlinkParam{} + if err := p.parseAttributes(attrs); err != nil { + return nil, err + } + params = append(params, p) + } + + return params, nil +} + +// DevlinkGetDeviceParams returns parameters for devlink device +// Equivalent to: `devlink dev param show /` +func DevlinkGetDeviceParams(bus string, device string) ([]*DevlinkParam, error) { + return pkgHandle.DevlinkGetDeviceParams(bus, device) +} + +// DevlinkGetDeviceParamByName returns specific parameter for devlink device +// Equivalent to: `devlink dev param show / name ` +func (h *Handle) DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_GET, bus, device) + if err != nil { + return nil, err + } + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param))) + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + if len(respmsg) == 0 { + return nil, fmt.Errorf("unexpected response") + } + attrs, err := nl.ParseRouteAttr(respmsg[0][nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + p := &DevlinkParam{} + if err := p.parseAttributes(attrs); err != nil { + return nil, err + } + return p, nil +} + +// DevlinkGetDeviceParamByName returns specific parameter for devlink device +// Equivalent to: `devlink dev param show / name ` +func DevlinkGetDeviceParamByName(bus string, device string, param string) (*DevlinkParam, error) { + return pkgHandle.DevlinkGetDeviceParamByName(bus, device, param) +} + +// DevlinkSetDeviceParam set specific parameter for devlink device +// Equivalent to: `devlink dev param set / name cmode value ` +// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants +// value argument should have one of the following types: uint8, uint16, uint32, string, bool +func (h *Handle) DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error { + // retrive the param type + p, err := h.DevlinkGetDeviceParamByName(bus, device, param) + if err != nil { + return fmt.Errorf("failed to get device param: %v", err) + } + paramType := p.Type + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PARAM_SET, bus, device) + if err != nil { + return err + } + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_TYPE, nl.Uint8Attr(paramType))) + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_NAME, nl.ZeroTerminated(param))) + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_CMODE, nl.Uint8Attr(cmode))) + + var valueAsBytes []byte + switch paramType { + case nl.DEVLINK_PARAM_TYPE_U8: + v, ok := value.(uint8) + if !ok { + return fmt.Errorf("unepected value type required: uint8, actual: %T", value) + } + valueAsBytes = nl.Uint8Attr(v) + case nl.DEVLINK_PARAM_TYPE_U16: + v, ok := value.(uint16) + if !ok { + return fmt.Errorf("unepected value type required: uint16, actual: %T", value) + } + valueAsBytes = nl.Uint16Attr(v) + case nl.DEVLINK_PARAM_TYPE_U32: + v, ok := value.(uint32) + if !ok { + return fmt.Errorf("unepected value type required: uint32, actual: %T", value) + } + valueAsBytes = nl.Uint32Attr(v) + case nl.DEVLINK_PARAM_TYPE_STRING: + v, ok := value.(string) + if !ok { + return fmt.Errorf("unepected value type required: string, actual: %T", value) + } + valueAsBytes = nl.ZeroTerminated(v) + case nl.DEVLINK_PARAM_TYPE_BOOL: + v, ok := value.(bool) + if !ok { + return fmt.Errorf("unepected value type required: bool, actual: %T", value) + } + if v { + valueAsBytes = []byte{} + } + default: + return fmt.Errorf("unsupported parameter type: %d", paramType) + } + if valueAsBytes != nil { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PARAM_VALUE_DATA, valueAsBytes)) + } + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevlinkSetDeviceParam set specific parameter for devlink device +// Equivalent to: `devlink dev param set / name cmode value ` +// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants +// value argument should have one of the following types: uint8, uint16, uint32, string, bool +func DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error { + return pkgHandle.DevlinkSetDeviceParam(bus, device, param, cmode, value) +} + +// DevLinkGetPortByIndex provides a pointer to devlink portand nil error, +// otherwise returns an error code. +func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + return pkgHandle.DevLinkGetPortByIndex(Bus, Device, PortIndex) +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func (h *Handle) DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_NEW, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FLAVOUR, nl.Uint16Attr(Flavour))) + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_PF_NUMBER, nl.Uint16Attr(Attrs.PfNumber))) + if Flavour == nl.DEVLINK_PORT_FLAVOUR_PCI_SF && Attrs.SfNumberValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_SF_NUMBER, nl.Uint32Attr(Attrs.SfNumber))) + } + if Attrs.PortIndexValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(Attrs.PortIndex))) + } + if Attrs.ControllerValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, nl.Uint32Attr(Attrs.Controller))) + } + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + return pkgHandle.DevLinkPortAdd(Bus, Device, Flavour, Attrs) +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func (h *Handle) DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_DEL, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + return pkgHandle.DevLinkPortDel(Bus, Device, PortIndex) +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_SET, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil) + + if FnAttrs.HwAddrValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr)) + } + + if FnAttrs.StateValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State)) + } + req.AddData(fnAttr) + + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs) +} + +// devlinkInfoGetter is function that is responsible for getting devlink info message +// this is introduced for test purpose +type devlinkInfoGetter func(bus, device string) ([]byte, error) + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByName(Bus string, Device string, getInfoMsg devlinkInfoGetter) (*DevlinkDeviceInfo, error) { + info, err := h.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, getInfoMsg) + if err != nil { + return nil, err + } + + return parseInfoData(info), nil +} + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByName(Bus string, Device string) (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string, getInfoMsg devlinkInfoGetter) (map[string]string, error) { + response, err := getInfoMsg(Bus, Device) + if err != nil { + return nil, err + } + + info, err := parseInfoMsg(response) + if err != nil { + return nil, err + } + + return info, nil +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string) (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfo returns devlink info for target device, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfo() (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfoAsMap returns devlink info for target device as a map, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfoAsMap() (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +func (h *Handle) getDevlinkInfoMsg(bus, device string) ([]byte, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_INFO_GET, bus, device) + if err != nil { + return nil, err + } + + response, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + if len(response) < 1 { + return nil, fmt.Errorf("getDevlinkInfoMsg: message too short") + } + + return response[0], nil +} + +func parseInfoMsg(msg []byte) (map[string]string, error) { + if len(msg) < nl.SizeofGenlmsg { + return nil, fmt.Errorf("parseInfoMsg: message too short") + } + + info := make(map[string]string) + err := collectInfoData(msg[nl.SizeofGenlmsg:], info) + + if err != nil { + return nil, err + } + + return info, nil +} + +func collectInfoData(msg []byte, data map[string]string) error { + attrs, err := nl.ParseRouteAttr(msg) + if err != nil { + return err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_DRIVER_NAME: + data["driver"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_SERIAL_NUMBER: + data["serialNumber"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_RUNNING, nl.DEVLINK_ATTR_INFO_VERSION_FIXED, + nl.DEVLINK_ATTR_INFO_VERSION_STORED: + key, value, err := getNestedInfoData(attr.Value) + if err != nil { + return err + } + data[key] = value + } + } + + if len(data) == 0 { + return fmt.Errorf("collectInfoData: could not read attributes") + } + + return nil +} + +func getNestedInfoData(msg []byte) (string, string, error) { + nestedAttrs, err := nl.ParseRouteAttr(msg) + + var key, value string + + if err != nil { + return "", "", err + } + + if len(nestedAttrs) != 2 { + return "", "", fmt.Errorf("getNestedInfoData: too few attributes in nested structure") + } + + for _, nestedAttr := range nestedAttrs { + switch nestedAttr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_VERSION_NAME: + key = parseInfoValue(nestedAttr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_VALUE: + value = parseInfoValue(nestedAttr.Value) + } + } + + if key == "" { + return "", "", fmt.Errorf("getNestedInfoData: key not found") + } + + if value == "" { + return "", "", fmt.Errorf("getNestedInfoData: value not found") + } + + return key, value, nil +} + +func parseInfoData(data map[string]string) *DevlinkDeviceInfo { + info := new(DevlinkDeviceInfo) + for key, value := range data { + switch key { + case "driver": + info.Driver = value + case "serialNumber": + info.SerialNumber = value + case "board.id": + info.BoardID = value + case "fw.app": + info.FwApp = value + case "fw.app.bundle_id": + info.FwAppBoundleID = value + case "fw.app.name": + info.FwAppName = value + case "fw.bundle_id": + info.FwBoundleID = value + case "fw.mgmt": + info.FwMgmt = value + case "fw.mgmt.api": + info.FwMgmtAPI = value + case "fw.mgmt.build": + info.FwMgmtBuild = value + case "fw.netlist": + info.FwNetlist = value + case "fw.netlist.build": + info.FwNetlistBuild = value + case "fw.psid.api": + info.FwPsidAPI = value + case "fw.undi": + info.FwUndi = value + } + } + return info +} + +func parseInfoValue(value []byte) string { + v := strings.ReplaceAll(string(value), "\x00", "") + return strings.TrimSpace(v) +} diff --git a/vendor/github.com/vishvananda/netlink/filter.go b/vendor/github.com/vishvananda/netlink/filter.go index 88792eab0ee..84e1ca7a49b 100644 --- a/vendor/github.com/vishvananda/netlink/filter.go +++ b/vendor/github.com/vishvananda/netlink/filter.go @@ -19,6 +19,7 @@ type FilterAttrs struct { Parent uint32 Priority uint16 // lower is higher priority Protocol uint16 // unix.ETH_P_* + Chain *uint32 } func (q FilterAttrs) String() string { @@ -27,6 +28,11 @@ func (q FilterAttrs) String() string { type TcAct int32 +const ( + TC_ACT_EXT_SHIFT = 28 + TC_ACT_EXT_VAL_MASK = (1 << TC_ACT_EXT_SHIFT) - 1 +) + const ( TC_ACT_UNSPEC TcAct = -1 TC_ACT_OK TcAct = 0 @@ -40,6 +46,22 @@ const ( TC_ACT_JUMP TcAct = 0x10000000 ) +func getTcActExt(local int32) int32 { + return local << TC_ACT_EXT_SHIFT +} + +func getTcActGotoChain() TcAct { + return TcAct(getTcActExt(2)) +} + +func getTcActExtOpcode(combined int32) int32 { + return combined & (^TC_ACT_EXT_VAL_MASK) +} + +func TcActExtCmp(combined int32, opcode int32) bool { + return getTcActExtOpcode(combined) == opcode +} + func (a TcAct) String() string { switch a { case TC_ACT_UNSPEC: @@ -63,6 +85,9 @@ func (a TcAct) String() string { case TC_ACT_JUMP: return "jump" } + if TcActExtCmp(int32(a), int32(getTcActGotoChain())) { + return "goto" + } return fmt.Sprintf("0x%x", int32(a)) } @@ -93,17 +118,32 @@ func (a TcPolAct) String() string { } type ActionAttrs struct { - Index int - Capab int - Action TcAct - Refcnt int - Bindcnt int + Index int + Capab int + Action TcAct + Refcnt int + Bindcnt int + Statistics *ActionStatistic + Timestamp *ActionTimestamp } func (q ActionAttrs) String() string { return fmt.Sprintf("{Index: %d, Capab: %x, Action: %s, Refcnt: %d, Bindcnt: %d}", q.Index, q.Capab, q.Action.String(), q.Refcnt, q.Bindcnt) } +type ActionTimestamp struct { + Installed uint64 + LastUsed uint64 + Expires uint64 + FirstUsed uint64 +} + +func (t ActionTimestamp) String() string { + return fmt.Sprintf("Installed %d LastUsed %d Expires %d FirstUsed %d", t.Installed, t.LastUsed, t.Expires, t.FirstUsed) +} + +type ActionStatistic ClassStatistics + // Action represents an action in any supported filter. type Action interface { Attrs() *ActionAttrs @@ -112,6 +152,7 @@ type Action interface { type GenericAction struct { ActionAttrs + Chain int32 } func (action *GenericAction) Type() string { @@ -157,6 +198,39 @@ func NewConnmarkAction() *ConnmarkAction { } } +type CsumUpdateFlags uint32 + +const ( + TCA_CSUM_UPDATE_FLAG_IPV4HDR CsumUpdateFlags = 1 + TCA_CSUM_UPDATE_FLAG_ICMP CsumUpdateFlags = 2 + TCA_CSUM_UPDATE_FLAG_IGMP CsumUpdateFlags = 4 + TCA_CSUM_UPDATE_FLAG_TCP CsumUpdateFlags = 8 + TCA_CSUM_UPDATE_FLAG_UDP CsumUpdateFlags = 16 + TCA_CSUM_UPDATE_FLAG_UDPLITE CsumUpdateFlags = 32 + TCA_CSUM_UPDATE_FLAG_SCTP CsumUpdateFlags = 64 +) + +type CsumAction struct { + ActionAttrs + UpdateFlags CsumUpdateFlags +} + +func (action *CsumAction) Type() string { + return "csum" +} + +func (action *CsumAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewCsumAction() *CsumAction { + return &CsumAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + type MirredAct uint8 func (a MirredAct) String() string { @@ -213,10 +287,11 @@ const ( type TunnelKeyAction struct { ActionAttrs - Action TunnelKeyAct - SrcAddr net.IP - DstAddr net.IP - KeyID uint32 + Action TunnelKeyAct + SrcAddr net.IP + DstAddr net.IP + KeyID uint32 + DestPort uint16 } func (action *TunnelKeyAction) Type() string { @@ -241,6 +316,7 @@ type SkbEditAction struct { PType *uint16 Priority *uint32 Mark *uint32 + Mask *uint32 } func (action *SkbEditAction) Type() string { @@ -259,6 +335,40 @@ func NewSkbEditAction() *SkbEditAction { } } +type PoliceAction struct { + ActionAttrs + Rate uint32 // in byte per second + Burst uint32 // in byte + RCellLog int + Mtu uint32 + Mpu uint16 // in byte + PeakRate uint32 // in byte per second + PCellLog int + AvRate uint32 // in byte per second + Overhead uint16 + LinkLayer int + ExceedAction TcPolAct + NotExceedAction TcPolAct +} + +func (action *PoliceAction) Type() string { + return "police" +} + +func (action *PoliceAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewPoliceAction() *PoliceAction { + return &PoliceAction{ + RCellLog: -1, + PCellLog: -1, + LinkLayer: 1, // ETHERNET + ExceedAction: TC_POLICE_RECLASSIFY, + NotExceedAction: TC_POLICE_OK, + } +} + // MatchAll filters match all packets type MatchAll struct { FilterAttrs @@ -274,20 +384,21 @@ func (filter *MatchAll) Type() string { return "matchall" } -type FilterFwAttrs struct { - ClassId uint32 - InDev string - Mask uint32 - Index uint32 - Buffer uint32 - Mtu uint32 - Mpu uint16 - Rate uint32 - AvRate uint32 - PeakRate uint32 - Action TcPolAct - Overhead uint16 - LinkLayer int +type FwFilter struct { + FilterAttrs + ClassId uint32 + InDev string + Mask uint32 + Police *PoliceAction + Actions []Action +} + +func (filter *FwFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *FwFilter) Type() string { + return "fw" } type BpfFilter struct { @@ -322,3 +433,30 @@ func (filter *GenericFilter) Attrs() *FilterAttrs { func (filter *GenericFilter) Type() string { return filter.FilterType } + +type PeditAction struct { + ActionAttrs + Proto uint8 + SrcMacAddr net.HardwareAddr + DstMacAddr net.HardwareAddr + SrcIP net.IP + DstIP net.IP + SrcPort uint16 + DstPort uint16 +} + +func (p *PeditAction) Attrs() *ActionAttrs { + return &p.ActionAttrs +} + +func (p *PeditAction) Type() string { + return "pedit" +} + +func NewPeditAction() *PeditAction { + return &PeditAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go index c56f314cd3f..87cd18f8e41 100644 --- a/vendor/github.com/vishvananda/netlink/filter_linux.go +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -37,9 +37,11 @@ type U32 struct { ClassId uint32 Divisor uint32 // Divisor MUST be power of 2. Hash uint32 + Link uint32 RedirIndex int Sel *TcU32Sel Actions []Action + Police *PoliceAction } func (filter *U32) Attrs() *FilterAttrs { @@ -50,74 +52,185 @@ func (filter *U32) Type() string { return "u32" } -// Fw filter filters on firewall marks -// NOTE: this is in filter_linux because it refers to nl.TcPolice which -// is defined in nl/tc_linux.go -type Fw struct { +type Flower struct { FilterAttrs - ClassId uint32 - // TODO remove nl type from interface - Police nl.TcPolice - InDev string - // TODO Action - Mask uint32 - AvRate uint32 - Rtab [256]uint32 - Ptab [256]uint32 -} - -func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) { - var rtab [256]uint32 - var ptab [256]uint32 - rcellLog := -1 - pcellLog := -1 - avrate := fattrs.AvRate / 8 - police := nl.TcPolice{} - police.Rate.Rate = fattrs.Rate / 8 - police.PeakRate.Rate = fattrs.PeakRate / 8 - buffer := fattrs.Buffer - linklayer := nl.LINKLAYER_ETHERNET + DestIP net.IP + DestIPMask net.IPMask + SrcIP net.IP + SrcIPMask net.IPMask + EthType uint16 + EncDestIP net.IP + EncDestIPMask net.IPMask + EncSrcIP net.IP + EncSrcIPMask net.IPMask + EncDestPort uint16 + EncKeyId uint32 + SkipHw bool + SkipSw bool + IPProto *nl.IPProto + DestPort uint16 + SrcPort uint16 - if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC { - linklayer = fattrs.LinkLayer - } + Actions []Action +} - police.Action = int32(fattrs.Action) - if police.Rate.Rate != 0 { - police.Rate.Mpu = fattrs.Mpu - police.Rate.Overhead = fattrs.Overhead - if CalcRtable(&police.Rate, rtab[:], rcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("TBF: failed to calculate rate table") - } - police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer))) +func (filter *Flower) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *Flower) Type() string { + return "flower" +} + +func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) { + ipType := v4Type + maskType := v4MaskType + + encodeMask := mask + if mask == nil { + encodeMask = net.CIDRMask(32, 32) } - police.Mtu = fattrs.Mtu - if police.PeakRate.Rate != 0 { - police.PeakRate.Mpu = fattrs.Mpu - police.PeakRate.Overhead = fattrs.Overhead - if CalcRtable(&police.PeakRate, ptab[:], pcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("POLICE: failed to calculate peak rate table") + v4IP := ip.To4() + if v4IP == nil { + ipType = v6Type + maskType = v6MaskType + if mask == nil { + encodeMask = net.CIDRMask(128, 128) } + } else { + ip = v4IP } - return &Fw{ - FilterAttrs: attrs, - ClassId: fattrs.ClassId, - InDev: fattrs.InDev, - Mask: fattrs.Mask, - Police: police, - AvRate: avrate, - Rtab: rtab, - Ptab: ptab, - }, nil + parent.AddRtAttr(ipType, ip) + parent.AddRtAttr(maskType, encodeMask) } -func (filter *Fw) Attrs() *FilterAttrs { - return &filter.FilterAttrs +func (filter *Flower) encode(parent *nl.RtAttr) error { + if filter.EthType != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType)) + } + if filter.SrcIP != nil { + filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask, + nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC, + nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK) + } + if filter.DestIP != nil { + filter.encodeIP(parent, filter.DestIP, filter.DestIPMask, + nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST, + nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK) + } + if filter.EncSrcIP != nil { + filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK) + } + if filter.EncDestIP != nil { + filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK) + } + if filter.EncDestPort != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort)) + } + if filter.EncKeyId != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId)) + } + if filter.IPProto != nil { + ipproto := *filter.IPProto + parent.AddRtAttr(nl.TCA_FLOWER_KEY_IP_PROTO, ipproto.Serialize()) + if filter.SrcPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_SRC, htons(filter.SrcPort)) + } + } + if filter.DestPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_DST, htons(filter.DestPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_DST, htons(filter.DestPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_DST, htons(filter.DestPort)) + } + } + } + + var flags uint32 = 0 + if filter.SkipHw { + flags |= nl.TCA_CLS_FLAGS_SKIP_HW + } + if filter.SkipSw { + flags |= nl.TCA_CLS_FLAGS_SKIP_SW + } + parent.AddRtAttr(nl.TCA_FLOWER_FLAGS, htonl(flags)) + + actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + return nil } -func (filter *Fw) Type() string { - return "fw" +func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error { + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FLOWER_KEY_ETH_TYPE: + filter.EthType = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC: + filter.SrcIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK: + filter.SrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST: + filter.DestIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK: + filter.DestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC: + filter.EncSrcIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK: + filter.EncSrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST: + filter.EncDestIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK: + filter.EncDestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT: + filter.EncDestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_ENC_KEY_ID: + filter.EncKeyId = ntohl(datum.Value) + case nl.TCA_FLOWER_KEY_IP_PROTO: + val := new(nl.IPProto) + *val = nl.IPProto(datum.Value[0]) + filter.IPProto = val + case nl.TCA_FLOWER_KEY_TCP_SRC, nl.TCA_FLOWER_KEY_UDP_SRC, nl.TCA_FLOWER_KEY_SCTP_SRC: + filter.SrcPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_TCP_DST, nl.TCA_FLOWER_KEY_UDP_DST, nl.TCA_FLOWER_KEY_SCTP_DST: + filter.DestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return err + } + filter.Actions, err = parseActions(tables) + if err != nil { + return err + } + case nl.TCA_FLOWER_FLAGS: + attr := nl.DeserializeUint32Bitfield(datum.Value) + skipSw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_HW + skipHw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_SW + if skipSw != 0 { + filter.SkipSw = true + } + if skipHw != 0 { + filter.SkipHw = true + } + } + } + return nil } // FilterDel will delete a filter from the system. @@ -129,19 +242,7 @@ func FilterDel(filter Filter) error { // FilterDel will delete a filter from the system. // Equivalent to: `tc filter del $filter` func (h *Handle) FilterDel(filter Filter) error { - req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK) - base := filter.Attrs() - msg := &nl.TcMsg{ - Family: nl.FAMILY_ALL, - Ifindex: int32(base.LinkIndex), - Handle: base.Handle, - Parent: base.Parent, - Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), - } - req.AddData(msg) - - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + return h.filterModify(filter, unix.RTM_DELTFILTER, 0) } // FilterAdd will add a filter to the system. @@ -153,7 +254,7 @@ func FilterAdd(filter Filter) error { // FilterAdd will add a filter to the system. // Equivalent to: `tc filter add $filter` func (h *Handle) FilterAdd(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE|unix.NLM_F_EXCL) } // FilterReplace will replace a filter. @@ -165,12 +266,11 @@ func FilterReplace(filter Filter) error { // FilterReplace will replace a filter. // Equivalent to: `tc filter replace $filter` func (h *Handle) FilterReplace(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE) } -func (h *Handle) filterModify(filter Filter, flags int) error { - native = nl.NativeEndian() - req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK) +func (h *Handle) filterModify(filter Filter, proto, flags int) error { + req := h.newNetlinkRequest(proto, flags|unix.NLM_F_ACK) base := filter.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -180,6 +280,9 @@ func (h *Handle) filterModify(filter Filter, flags int) error { Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), } req.AddData(msg) + if filter.Attrs().Chain != nil { + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(*filter.Attrs().Chain))) + } req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -226,6 +329,15 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.Hash != 0 { options.AddRtAttr(nl.TCA_U32_HASH, nl.Uint32Attr(filter.Hash)) } + if filter.Link != 0 { + options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link)) + } + if filter.Police != nil { + police := options.AddRtAttr(nl.TCA_U32_POLICE, nil) + if err := encodePolice(police, filter.Police); err != nil { + return err + } + } actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil) // backwards compatibility if filter.RedirIndex != 0 { @@ -234,7 +346,7 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if err := EncodeActions(actionsAttr, filter.Actions); err != nil { return err } - case *Fw: + case *FwFilter: if filter.Mask != 0 { b := make([]byte, 4) native.PutUint32(b, filter.Mask) @@ -243,17 +355,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.InDev != "" { options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev)) } - if (filter.Police != nl.TcPolice{}) { - + if filter.Police != nil { police := options.AddRtAttr(nl.TCA_FW_POLICE, nil) - police.AddRtAttr(nl.TCA_POLICE_TBF, filter.Police.Serialize()) - if (filter.Police.Rate != nl.TcRateSpec{}) { - payload := SerializeRtab(filter.Rtab) - police.AddRtAttr(nl.TCA_POLICE_RATE, payload) - } - if (filter.Police.PeakRate != nl.TcRateSpec{}) { - payload := SerializeRtab(filter.Ptab) - police.AddRtAttr(nl.TCA_POLICE_PEAKRATE, payload) + if err := encodePolice(police, filter.Police); err != nil { + return err } } if filter.ClassId != 0 { @@ -261,6 +366,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error { native.PutUint32(b, filter.ClassId) options.AddRtAttr(nl.TCA_FW_CLASSID, b) } + actionsAttr := options.AddRtAttr(nl.TCA_FW_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } case *BpfFilter: var bpfFlags uint32 if filter.ClassId != 0 { @@ -284,8 +393,11 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.ClassId != 0 { options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId)) } + case *Flower: + if err := filter.encode(options); err != nil { + return err + } } - req.AddData(options) _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err @@ -347,11 +459,13 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { case "u32": filter = &U32{} case "fw": - filter = &Fw{} + filter = &FwFilter{} case "bpf": filter = &BpfFilter{} case "matchall": filter = &MatchAll{} + case "flower": + filter = &Flower{} default: filter = &GenericFilter{FilterType: filterType} } @@ -381,9 +495,18 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { if err != nil { return nil, err } + case "flower": + detailed, err = parseFlowerData(filter, data) + if err != nil { + return nil, err + } default: detailed = true } + case nl.TCA_CHAIN: + val := new(uint32) + *val = native.Uint32(attr.Value) + base.Chain = val } } // only return the detailed version of the filter @@ -412,6 +535,61 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) { attrs.Bindcnt = int(tcgen.Bindcnt) } +func toTimeStamp(tcf *nl.Tcf) *ActionTimestamp { + return &ActionTimestamp{ + Installed: tcf.Install, + LastUsed: tcf.LastUse, + Expires: tcf.Expires, + FirstUsed: tcf.FirstUse} +} + +func encodePolice(attr *nl.RtAttr, action *PoliceAction) error { + var rtab [256]uint32 + var ptab [256]uint32 + police := nl.TcPolice{} + police.Index = uint32(action.Attrs().Index) + police.Bindcnt = int32(action.Attrs().Bindcnt) + police.Capab = uint32(action.Attrs().Capab) + police.Refcnt = int32(action.Attrs().Refcnt) + police.Rate.Rate = action.Rate + police.PeakRate.Rate = action.PeakRate + police.Action = int32(action.ExceedAction) + + if police.Rate.Rate != 0 { + police.Rate.Mpu = action.Mpu + police.Rate.Overhead = action.Overhead + if CalcRtable(&police.Rate, rtab[:], action.RCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("TBF: failed to calculate rate table") + } + police.Burst = Xmittime(uint64(police.Rate.Rate), action.Burst) + } + + police.Mtu = action.Mtu + if police.PeakRate.Rate != 0 { + police.PeakRate.Mpu = action.Mpu + police.PeakRate.Overhead = action.Overhead + if CalcRtable(&police.PeakRate, ptab[:], action.PCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("POLICE: failed to calculate peak rate table") + } + } + + attr.AddRtAttr(nl.TCA_POLICE_TBF, police.Serialize()) + if police.Rate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RATE, SerializeRtab(rtab)) + } + if police.PeakRate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_PEAKRATE, SerializeRtab(ptab)) + } + if action.AvRate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_AVRATE, nl.Uint32Attr(action.AvRate)) + } + if action.NotExceedAction != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RESULT, nl.Uint32Attr(uint32(action.NotExceedAction))) + } + + return nil +} + func EncodeActions(attr *nl.RtAttr, actions []Action) error { tabIndex := int(nl.TCA_ACT_TAB) @@ -419,6 +597,14 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { switch action := action.(type) { default: return fmt.Errorf("unknown action type %s", action.Type()) + case *PoliceAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("police")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + if err := encodePolice(aopts, action); err != nil { + return err + } case *MirredAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -456,6 +642,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { } else { return fmt.Errorf("invalid dst addr %s for tunnel_key action", action.DstAddr) } + if action.DestPort != 0 { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_DST_PORT, htons(action.DestPort)) + } } case *SkbEditAction: table := attr.AddRtAttr(tabIndex, nil) @@ -477,6 +666,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { if action.Mark != nil { aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark)) } + if action.Mask != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_MASK, nl.Uint32Attr(*action.Mask)) + } case *ConnmarkAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -487,6 +679,16 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { } toTcGen(action.Attrs(), &connmark.TcGen) aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize()) + case *CsumAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("csum")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + csum := nl.TcCsum{ + UpdateFlags: uint32(action.UpdateFlags), + } + toTcGen(action.Attrs(), &csum.TcGen) + aopts.AddRtAttr(nl.TCA_CSUM_PARMS, csum.Serialize()) case *BpfAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -505,16 +707,64 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { gen := nl.TcGen{} toTcGen(action.Attrs(), &gen) aopts.AddRtAttr(nl.TCA_GACT_PARMS, gen.Serialize()) + case *PeditAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + pedit := nl.TcPedit{} + if action.SrcMacAddr != nil { + pedit.SetEthSrc(action.SrcMacAddr) + } + if action.DstMacAddr != nil { + pedit.SetEthDst(action.DstMacAddr) + } + if action.SrcIP != nil { + pedit.SetSrcIP(action.SrcIP) + } + if action.DstIP != nil { + pedit.SetDstIP(action.DstIP) + } + if action.SrcPort != 0 { + pedit.SetSrcPort(action.SrcPort, action.Proto) + } + if action.DstPort != 0 { + pedit.SetDstPort(action.DstPort, action.Proto) + } + pedit.Encode(table) } } return nil } +func parsePolice(data syscall.NetlinkRouteAttr, police *PoliceAction) { + switch data.Attr.Type { + case nl.TCA_POLICE_RESULT: + police.NotExceedAction = TcPolAct(native.Uint32(data.Value[0:4])) + case nl.TCA_POLICE_AVRATE: + police.AvRate = native.Uint32(data.Value[0:4]) + case nl.TCA_POLICE_TBF: + p := *nl.DeserializeTcPolice(data.Value) + police.ActionAttrs = ActionAttrs{} + police.Attrs().Index = int(p.Index) + police.Attrs().Bindcnt = int(p.Bindcnt) + police.Attrs().Capab = int(p.Capab) + police.Attrs().Refcnt = int(p.Refcnt) + police.ExceedAction = TcPolAct(p.Action) + police.Rate = p.Rate.Rate + police.PeakRate = p.PeakRate.Rate + police.Burst = Xmitsize(uint64(p.Rate.Rate), p.Burst) + police.Mtu = p.Mtu + police.LinkLayer = int(p.Rate.Linklayer) & nl.TC_LINKLAYER_MASK + police.Overhead = p.Rate.Overhead + } +} + func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { var actions []Action for _, table := range tables { var action Action var actionType string + var actionnStatistic *ActionStatistic + var actionTimestamp *ActionTimestamp aattrs, err := nl.ParseRouteAttr(table.Value) if err != nil { return nil, err @@ -532,12 +782,18 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action = &BpfAction{} case "connmark": action = &ConnmarkAction{} + case "csum": + action = &CsumAction{} case "gact": action = &GenericAction{} case "tunnel_key": action = &TunnelKeyAction{} case "skbedit": action = &SkbEditAction{} + case "police": + action = &PoliceAction{} + case "pedit": + action = &PeditAction{} default: break nextattr } @@ -556,7 +812,11 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { toAttrs(&mirred.TcGen, action.Attrs()) action.(*MirredAction).Ifindex = int(mirred.Ifindex) action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction) + case nl.TCA_MIRRED_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } + case "tunnel_key": switch adatum.Attr.Type { case nl.TCA_TUNNEL_KEY_PARMS: @@ -566,12 +826,15 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*TunnelKeyAction).Action = TunnelKeyAct(tun.Action) case nl.TCA_TUNNEL_KEY_ENC_KEY_ID: action.(*TunnelKeyAction).KeyID = networkOrder.Uint32(adatum.Value[0:4]) - case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC: - case nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC: - action.(*TunnelKeyAction).SrcAddr = net.IP(adatum.Value[:]) - case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST: - case nl.TCA_TUNNEL_KEY_ENC_IPV4_DST: - action.(*TunnelKeyAction).DstAddr = net.IP(adatum.Value[:]) + case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC: + action.(*TunnelKeyAction).SrcAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, nl.TCA_TUNNEL_KEY_ENC_IPV4_DST: + action.(*TunnelKeyAction).DstAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_DST_PORT: + action.(*TunnelKeyAction).DestPort = ntohs(adatum.Value) + case nl.TCA_TUNNEL_KEY_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "skbedit": switch adatum.Attr.Type { @@ -582,6 +845,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_SKBEDIT_MARK: mark := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Mark = &mark + case nl.TCA_SKBEDIT_MASK: + mask := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Mask = &mask case nl.TCA_SKBEDIT_PRIORITY: priority := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Priority = &priority @@ -591,6 +857,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_SKBEDIT_QUEUE_MAPPING: mapping := native.Uint16(adatum.Value[0:2]) action.(*SkbEditAction).QueueMapping = &mapping + case nl.TCA_SKBEDIT_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "bpf": switch adatum.Attr.Type { @@ -601,6 +870,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*BpfAction).Fd = int(native.Uint32(adatum.Value[0:4])) case nl.TCA_ACT_BPF_NAME: action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1]) + case nl.TCA_ACT_BPF_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "connmark": switch adatum.Attr.Type { @@ -609,24 +881,53 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action.(*ConnmarkAction).ActionAttrs = ActionAttrs{} toAttrs(&connmark.TcGen, action.Attrs()) action.(*ConnmarkAction).Zone = connmark.Zone + case nl.TCA_CONNMARK_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) + } + case "csum": + switch adatum.Attr.Type { + case nl.TCA_CSUM_PARMS: + csum := *nl.DeserializeTcCsum(adatum.Value) + action.(*CsumAction).ActionAttrs = ActionAttrs{} + toAttrs(&csum.TcGen, action.Attrs()) + action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags) + case nl.TCA_CSUM_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } case "gact": switch adatum.Attr.Type { case nl.TCA_GACT_PARMS: gen := *nl.DeserializeTcGen(adatum.Value) toAttrs(&gen, action.Attrs()) + if action.Attrs().Action.String() == "goto" { + action.(*GenericAction).Chain = TC_ACT_EXT_VAL_MASK & gen.Action + } + case nl.TCA_GACT_TM: + tcTs := nl.DeserializeTcf(adatum.Value) + actionTimestamp = toTimeStamp(tcTs) } + case "police": + parsePolice(adatum, action.(*PoliceAction)) } } + case nl.TCA_ACT_STATS: + s, err := parseTcStats2(aattr.Value) + if err != nil { + return nil, err + } + actionnStatistic = (*ActionStatistic)(s) } } + action.Attrs().Statistics = actionnStatistic + action.Attrs().Timestamp = actionTimestamp actions = append(actions, action) } return actions, nil } func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() u32 := filter.(*U32) detailed := false for _, datum := range data { @@ -658,20 +959,28 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) u32.RedirIndex = int(action.Ifindex) } } + case nl.TCA_U32_POLICE: + var police PoliceAction + adata, _ := nl.ParseRouteAttr(datum.Value) + for _, aattr := range adata { + parsePolice(aattr, &police) + } + u32.Police = &police case nl.TCA_U32_CLASSID: u32.ClassId = native.Uint32(datum.Value) case nl.TCA_U32_DIVISOR: u32.Divisor = native.Uint32(datum.Value) case nl.TCA_U32_HASH: u32.Hash = native.Uint32(datum.Value) + case nl.TCA_U32_LINK: + u32.Link = native.Uint32(datum.Value) } } return detailed, nil } func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() - fw := filter.(*Fw) + fw := filter.(*FwFilter) detailed := true for _, datum := range data { switch datum.Attr.Type { @@ -682,16 +991,20 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { case nl.TCA_FW_INDEV: fw.InDev = string(datum.Value[:len(datum.Value)-1]) case nl.TCA_FW_POLICE: + var police PoliceAction adata, _ := nl.ParseRouteAttr(datum.Value) for _, aattr := range adata { - switch aattr.Attr.Type { - case nl.TCA_POLICE_TBF: - fw.Police = *nl.DeserializeTcPolice(aattr.Value) - case nl.TCA_POLICE_RATE: - fw.Rtab = DeserializeRtab(aattr.Value) - case nl.TCA_POLICE_PEAKRATE: - fw.Ptab = DeserializeRtab(aattr.Value) - } + parsePolice(aattr, &police) + } + fw.Police = &police + case nl.TCA_FW_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + fw.Actions, err = parseActions(tables) + if err != nil { + return detailed, err } } } @@ -699,7 +1012,6 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { } func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() bpf := filter.(*BpfFilter) detailed := true for _, datum := range data { @@ -718,14 +1030,13 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) case nl.TCA_BPF_ID: bpf.Id = int(native.Uint32(datum.Value[0:4])) case nl.TCA_BPF_TAG: - bpf.Tag = hex.EncodeToString(datum.Value[:len(datum.Value)-1]) + bpf.Tag = hex.EncodeToString(datum.Value) } } return detailed, nil } func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() matchall := filter.(*MatchAll) detailed := true for _, datum := range data { @@ -746,6 +1057,10 @@ func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, er return detailed, nil } +func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + return true, filter.(*Flower).decode(data) +} + func AlignToAtm(size uint) uint { var linksize, cells int cells = int(size / nl.ATM_CELL_PAYLOAD) @@ -783,7 +1098,7 @@ func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, lin } for i := 0; i < 256; i++ { sz = AdjustSize(uint((i+1)<= nl.IPSET_ERR_PRIVATE { + err = nl.IPSetError(uintptr(errno)) + } + } + return +} + +func ipsetUnserialize(msgs [][]byte) (result IPSetResult) { + for _, msg := range msgs { + result.unserialize(msg) + } + return result +} + +func (result *IPSetResult) unserialize(msg []byte) { + result.Nfgenmsg = nl.DeserializeNfgenmsg(msg) + + for attr := range nl.ParseAttributes(msg[4:]) { + switch attr.Type { + case nl.IPSET_ATTR_PROTOCOL: + result.Protocol = attr.Value[0] + case nl.IPSET_ATTR_SETNAME: + result.SetName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_TYPENAME: + result.TypeName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_REVISION: + result.Revision = attr.Value[0] + case nl.IPSET_ATTR_FAMILY: + result.Family = attr.Value[0] + case nl.IPSET_ATTR_FLAGS: + result.Flags = attr.Value[0] + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.parseAttrData(attr.Value) + case nl.IPSET_ATTR_ADT | nl.NLA_F_NESTED: + result.parseAttrADT(attr.Value) + case nl.IPSET_ATTR_PROTOCOL_MIN: + result.ProtocolMinVersion = attr.Value[0] + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrData(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_HASHSIZE | nl.NLA_F_NET_BYTEORDER: + result.HashSize = attr.Uint32() + case nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER: + result.MaxElements = attr.Uint32() + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + result.Timeout = &val + case nl.IPSET_ATTR_ELEMENTS | nl.NLA_F_NET_BYTEORDER: + result.NumEntries = attr.Uint32() + case nl.IPSET_ATTR_REFERENCES | nl.NLA_F_NET_BYTEORDER: + result.References = attr.Uint32() + case nl.IPSET_ATTR_MEMSIZE | nl.NLA_F_NET_BYTEORDER: + result.SizeInMemory = attr.Uint32() + case nl.IPSET_ATTR_CADT_FLAGS | nl.NLA_F_NET_BYTEORDER: + result.CadtFlags = attr.Uint32() + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER: + result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value}) + case nl.IPSET_ATTR_IP: + result.IPFrom = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP: + result.IPTo = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER: + result.PortFrom = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER: + result.PortTo = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER: + result.LineNo = attr.Uint32() + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrADT(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.Entries = append(result.Entries, parseIPSetEntry(attr.Value)) + default: + log.Printf("unknown ADT attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func parseIPSetEntry(data []byte) (entry IPSetEntry) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Timeout = &val + case nl.IPSET_ATTR_BYTES | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Bytes = &val + case nl.IPSET_ATTR_PACKETS | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Packets = &val + case nl.IPSET_ATTR_ETHER: + entry.MAC = net.HardwareAddr(attr.Value) + case nl.IPSET_ATTR_IP: + entry.IP = net.IP(attr.Value) + case nl.IPSET_ATTR_COMMENT: + entry.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: + entry.IP = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: + entry.IP2 = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_CIDR: + entry.CIDR = attr.Value[0] + case nl.IPSET_ATTR_CIDR2: + entry.CIDR2 = attr.Value[0] + case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER: + val := networkOrder.Uint16(attr.Value) + entry.Port = &val + case nl.IPSET_ATTR_PROTO: + val := attr.Value[0] + entry.Protocol = &val + case nl.IPSET_ATTR_IFACE: + entry.IFace = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Mark = &val + default: + log.Printf("unknown ADT attribute from kernel: %+v", attr) + } + } + return +} diff --git a/vendor/github.com/vishvananda/netlink/link.go b/vendor/github.com/vishvananda/netlink/link.go index 886d88d1b2f..f820cdb678d 100644 --- a/vendor/github.com/vishvananda/netlink/link.go +++ b/vendor/github.com/vishvananda/netlink/link.go @@ -22,31 +22,41 @@ type ( // LinkAttrs represents data shared by most link types type LinkAttrs struct { - Index int - MTU int - TxQLen int // Transmit Queue Length - Name string - HardwareAddr net.HardwareAddr - Flags net.Flags - RawFlags uint32 - ParentIndex int // index of the parent link device - MasterIndex int // must be the index of a bridge - Namespace interface{} // nil | NsPid | NsFd - Alias string - Statistics *LinkStatistics - Promisc int - Xdp *LinkXdp - EncapType string - Protinfo *Protinfo - OperState LinkOperState - NetNsID int - NumTxQueues int - NumRxQueues int - GSOMaxSize uint32 - GSOMaxSegs uint32 - Vfs []VfInfo // virtual functions available on link - Group uint32 - Slave LinkSlave + Index int + MTU int + TxQLen int // Transmit Queue Length + Name string + HardwareAddr net.HardwareAddr + Flags net.Flags + RawFlags uint32 + ParentIndex int // index of the parent link device + MasterIndex int // must be the index of a bridge + Namespace interface{} // nil | NsPid | NsFd + Alias string + AltNames []string + Statistics *LinkStatistics + Promisc int + Allmulti int + Multi int + Xdp *LinkXdp + EncapType string + Protinfo *Protinfo + OperState LinkOperState + PhysSwitchID int + NetNsID int + NumTxQueues int + NumRxQueues int + TSOMaxSegs uint32 + TSOMaxSize uint32 + GSOMaxSegs uint32 + GSOMaxSize uint32 + GROMaxSize uint32 + GSOIPv4MaxSize uint32 + GROIPv4MaxSize uint32 + Vfs []VfInfo // virtual functions available on link + Group uint32 + PermHWAddr net.HardwareAddr + Slave LinkSlave } // LinkSlave represents a slave device. @@ -60,11 +70,23 @@ type VfInfo struct { Mac net.HardwareAddr Vlan int Qos int + VlanProto int TxRate int // IFLA_VF_TX_RATE Max TxRate Spoofchk bool LinkState uint32 MaxTxRate uint32 // IFLA_VF_RATE Max TxRate MinTxRate uint32 // IFLA_VF_RATE Min TxRate + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 + + RssQuery uint32 + Trust uint32 } // LinkOperState represents the values of the IFLA_OPERSTATE link @@ -103,7 +125,8 @@ func (s LinkOperState) String() string { // NewLinkAttrs returns LinkAttrs structure filled with default values func NewLinkAttrs() LinkAttrs { return LinkAttrs{ - TxQLen: -1, + NetNsID: -1, + TxQLen: -1, } } @@ -196,10 +219,11 @@ type LinkStatistics64 struct { } type LinkXdp struct { - Fd int - Attached bool - Flags uint32 - ProgId uint32 + Fd int + Attached bool + AttachMode uint32 + Flags uint32 + ProgId uint32 } // Device links cannot be created via netlink. These links @@ -246,8 +270,11 @@ func (ifb *Ifb) Type() string { type Bridge struct { LinkAttrs MulticastSnooping *bool + AgeingTime *uint32 HelloTime *uint32 VlanFiltering *bool + VlanDefaultPVID *uint16 + GroupFwdMask *uint16 } func (bridge *Bridge) Attrs() *LinkAttrs { @@ -291,6 +318,9 @@ type Macvlan struct { // MACAddrs is only populated for Macvlan SOURCE links MACAddrs []net.HardwareAddr + + BCQueueLen uint32 + UsedBCQueueLen uint32 } func (macvlan *Macvlan) Attrs() *LinkAttrs { @@ -333,11 +363,52 @@ func (tuntap *Tuntap) Type() string { return "tuntap" } +type NetkitMode uint32 + +const ( + NETKIT_MODE_L2 NetkitMode = iota + NETKIT_MODE_L3 +) + +type NetkitPolicy int + +const ( + NETKIT_POLICY_FORWARD NetkitPolicy = 0 + NETKIT_POLICY_BLACKHOLE NetkitPolicy = 2 +) + +func (n *Netkit) IsPrimary() bool { + return n.isPrimary +} + +// SetPeerAttrs will not take effect if trying to modify an existing netkit device +func (n *Netkit) SetPeerAttrs(Attrs *LinkAttrs) { + n.peerLinkAttrs = *Attrs +} + +type Netkit struct { + LinkAttrs + Mode NetkitMode + Policy NetkitPolicy + PeerPolicy NetkitPolicy + isPrimary bool + peerLinkAttrs LinkAttrs +} + +func (n *Netkit) Attrs() *LinkAttrs { + return &n.LinkAttrs +} + +func (n *Netkit) Type() string { + return "netkit" +} + // Veth devices must specify PeerName on create type Veth struct { LinkAttrs PeerName string // veth on create only PeerHardwareAddr net.HardwareAddr + PeerNamespace interface{} } func (veth *Veth) Attrs() *LinkAttrs { @@ -348,6 +419,19 @@ func (veth *Veth) Type() string { return "veth" } +// Wireguard represent links of type "wireguard", see https://www.wireguard.com/ +type Wireguard struct { + LinkAttrs +} + +func (wg *Wireguard) Attrs() *LinkAttrs { + return &wg.LinkAttrs +} + +func (wg *Wireguard) Type() string { + return "wireguard" +} + // GenericLink links represent types that are not currently understood // by this netlink library. type GenericLink struct { @@ -428,6 +512,19 @@ func (ipvlan *IPVlan) Type() string { return "ipvlan" } +// IPVtap - IPVtap is a virtual interfaces based on ipvlan +type IPVtap struct { + IPVlan +} + +func (ipvtap *IPVtap) Attrs() *LinkAttrs { + return &ipvtap.LinkAttrs +} + +func (ipvtap IPVtap) Type() string { + return "ipvtap" +} + // VlanProtocol type type VlanProtocol int @@ -527,6 +624,27 @@ const ( BOND_ARP_VALIDATE_ALL ) +var bondArpValidateToString = map[BondArpValidate]string{ + BOND_ARP_VALIDATE_NONE: "none", + BOND_ARP_VALIDATE_ACTIVE: "active", + BOND_ARP_VALIDATE_BACKUP: "backup", + BOND_ARP_VALIDATE_ALL: "none", +} +var StringToBondArpValidateMap = map[string]BondArpValidate{ + "none": BOND_ARP_VALIDATE_NONE, + "active": BOND_ARP_VALIDATE_ACTIVE, + "backup": BOND_ARP_VALIDATE_BACKUP, + "all": BOND_ARP_VALIDATE_ALL, +} + +func (b BondArpValidate) String() string { + s, ok := bondArpValidateToString[b] + if !ok { + return fmt.Sprintf("BondArpValidate(%d)", b) + } + return s +} + // BondPrimaryReselect type type BondPrimaryReselect int @@ -537,6 +655,25 @@ const ( BOND_PRIMARY_RESELECT_FAILURE ) +var bondPrimaryReselectToString = map[BondPrimaryReselect]string{ + BOND_PRIMARY_RESELECT_ALWAYS: "always", + BOND_PRIMARY_RESELECT_BETTER: "better", + BOND_PRIMARY_RESELECT_FAILURE: "failure", +} +var StringToBondPrimaryReselectMap = map[string]BondPrimaryReselect{ + "always": BOND_PRIMARY_RESELECT_ALWAYS, + "better": BOND_PRIMARY_RESELECT_BETTER, + "failure": BOND_PRIMARY_RESELECT_FAILURE, +} + +func (b BondPrimaryReselect) String() string { + s, ok := bondPrimaryReselectToString[b] + if !ok { + return fmt.Sprintf("BondPrimaryReselect(%d)", b) + } + return s +} + // BondArpAllTargets type type BondArpAllTargets int @@ -546,6 +683,23 @@ const ( BOND_ARP_ALL_TARGETS_ALL ) +var bondArpAllTargetsToString = map[BondArpAllTargets]string{ + BOND_ARP_ALL_TARGETS_ANY: "any", + BOND_ARP_ALL_TARGETS_ALL: "all", +} +var StringToBondArpAllTargetsMap = map[string]BondArpAllTargets{ + "any": BOND_ARP_ALL_TARGETS_ANY, + "all": BOND_ARP_ALL_TARGETS_ALL, +} + +func (b BondArpAllTargets) String() string { + s, ok := bondArpAllTargetsToString[b] + if !ok { + return fmt.Sprintf("BondArpAllTargets(%d)", b) + } + return s +} + // BondFailOverMac type type BondFailOverMac int @@ -556,6 +710,25 @@ const ( BOND_FAIL_OVER_MAC_FOLLOW ) +var bondFailOverMacToString = map[BondFailOverMac]string{ + BOND_FAIL_OVER_MAC_NONE: "none", + BOND_FAIL_OVER_MAC_ACTIVE: "active", + BOND_FAIL_OVER_MAC_FOLLOW: "follow", +} +var StringToBondFailOverMacMap = map[string]BondFailOverMac{ + "none": BOND_FAIL_OVER_MAC_NONE, + "active": BOND_FAIL_OVER_MAC_ACTIVE, + "follow": BOND_FAIL_OVER_MAC_FOLLOW, +} + +func (b BondFailOverMac) String() string { + s, ok := bondFailOverMacToString[b] + if !ok { + return fmt.Sprintf("BondFailOverMac(%d)", b) + } + return s +} + // BondXmitHashPolicy type type BondXmitHashPolicy int @@ -583,6 +756,7 @@ const ( BOND_XMIT_HASH_POLICY_LAYER2_3 BOND_XMIT_HASH_POLICY_ENCAP2_3 BOND_XMIT_HASH_POLICY_ENCAP3_4 + BOND_XMIT_HASH_POLICY_VLAN_SRCMAC BOND_XMIT_HASH_POLICY_UNKNOWN ) @@ -592,6 +766,7 @@ var bondXmitHashPolicyToString = map[BondXmitHashPolicy]string{ BOND_XMIT_HASH_POLICY_LAYER2_3: "layer2+3", BOND_XMIT_HASH_POLICY_ENCAP2_3: "encap2+3", BOND_XMIT_HASH_POLICY_ENCAP3_4: "encap3+4", + BOND_XMIT_HASH_POLICY_VLAN_SRCMAC: "vlan+srcmac", } var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{ "layer2": BOND_XMIT_HASH_POLICY_LAYER2, @@ -599,6 +774,7 @@ var StringToBondXmitHashPolicyMap = map[string]BondXmitHashPolicy{ "layer2+3": BOND_XMIT_HASH_POLICY_LAYER2_3, "encap2+3": BOND_XMIT_HASH_POLICY_ENCAP2_3, "encap3+4": BOND_XMIT_HASH_POLICY_ENCAP3_4, + "vlan+srcmac": BOND_XMIT_HASH_POLICY_VLAN_SRCMAC, } // BondLacpRate type @@ -647,6 +823,25 @@ const ( BOND_AD_SELECT_COUNT ) +var bondAdSelectToString = map[BondAdSelect]string{ + BOND_AD_SELECT_STABLE: "stable", + BOND_AD_SELECT_BANDWIDTH: "bandwidth", + BOND_AD_SELECT_COUNT: "count", +} +var StringToBondAdSelectMap = map[string]BondAdSelect{ + "stable": BOND_AD_SELECT_STABLE, + "bandwidth": BOND_AD_SELECT_BANDWIDTH, + "count": BOND_AD_SELECT_COUNT, +} + +func (b BondAdSelect) String() string { + s, ok := bondAdSelectToString[b] + if !ok { + return fmt.Sprintf("BondAdSelect(%d)", b) + } + return s +} + // BondAdInfo represents ad info for bond type BondAdInfo struct { AggregatorId int @@ -678,7 +873,7 @@ type Bond struct { AllSlavesActive int MinLinks int LpInterval int - PackersPerSlave int + PacketsPerSlave int LacpRate BondLacpRate AdSelect BondAdSelect // looking at iproute tool AdInfo can only be retrived. It can't be set. @@ -711,7 +906,7 @@ func NewLinkBond(atr LinkAttrs) *Bond { AllSlavesActive: -1, MinLinks: -1, LpInterval: -1, - PackersPerSlave: -1, + PacketsPerSlave: -1, LacpRate: -1, AdSelect: -1, AdActorSysPrio: -1, @@ -761,8 +956,10 @@ func (bond *Bond) Type() string { type BondSlaveState uint8 const ( - BondStateActive = iota // Link is active. - BondStateBackup // Link is backup. + //BondStateActive Link is active. + BondStateActive BondSlaveState = iota + //BondStateBackup Link is backup. + BondStateBackup ) func (s BondSlaveState) String() string { @@ -776,15 +973,19 @@ func (s BondSlaveState) String() string { } } -// BondSlaveState represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave +// BondSlaveMiiStatus represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave // attribute, which contains the status of MII link monitoring type BondSlaveMiiStatus uint8 const ( - BondLinkUp = iota // link is up and running. - BondLinkFail // link has just gone down. - BondLinkDown // link has been down for too long time. - BondLinkBack // link is going back. + //BondLinkUp link is up and running. + BondLinkUp BondSlaveMiiStatus = iota + //BondLinkFail link has just gone down. + BondLinkFail + //BondLinkDown link has been down for too long time. + BondLinkDown + //BondLinkBack link is going back. + BondLinkBack ) func (s BondSlaveMiiStatus) String() string { @@ -817,6 +1018,49 @@ func (b *BondSlave) SlaveType() string { return "bond" } +type VrfSlave struct { + Table uint32 +} + +func (v *VrfSlave) SlaveType() string { + return "vrf" +} + +// Geneve devices must specify RemoteIP and ID (VNI) on create +// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223 +type Geneve struct { + LinkAttrs + ID uint32 // vni + Remote net.IP + Ttl uint8 + Tos uint8 + Dport uint16 + UdpCsum uint8 + UdpZeroCsum6Tx uint8 + UdpZeroCsum6Rx uint8 + Link uint32 + FlowBased bool + InnerProtoInherit bool + Df GeneveDf +} + +func (geneve *Geneve) Attrs() *LinkAttrs { + return &geneve.LinkAttrs +} + +func (geneve *Geneve) Type() string { + return "geneve" +} + +type GeneveDf uint8 + +const ( + GENEVE_DF_UNSET GeneveDf = iota + GENEVE_DF_SET + GENEVE_DF_INHERIT + GENEVE_DF_MAX +) + // Gretap devices must specify LocalIP and RemoteIP on create type Gretap struct { LinkAttrs @@ -861,6 +1105,7 @@ type Iptun struct { EncapType uint16 EncapFlags uint16 FlowBased bool + Proto uint8 } func (iptun *Iptun) Attrs() *LinkAttrs { @@ -878,10 +1123,15 @@ type Ip6tnl struct { Remote net.IP Ttl uint8 Tos uint8 - EncapLimit uint8 Flags uint32 Proto uint8 FlowInfo uint32 + EncapLimit uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 + FlowBased bool } func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs { @@ -892,14 +1142,47 @@ func (ip6tnl *Ip6tnl) Type() string { return "ip6tnl" } +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L84 +type TunnelEncapType uint16 + +const ( + None TunnelEncapType = iota + FOU + GUE +) + +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L91 +type TunnelEncapFlag uint16 + +const ( + CSum TunnelEncapFlag = 1 << 0 + CSum6 = 1 << 1 + RemCSum = 1 << 2 +) + +// from https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/ip6_tunnel.h#L12 +type IP6TunnelFlag uint16 + +const ( + IP6_TNL_F_IGN_ENCAP_LIMIT IP6TunnelFlag = 1 // don't add encapsulation limit if one isn't present in inner packet + IP6_TNL_F_USE_ORIG_TCLASS = 2 // copy the traffic class field from the inner packet + IP6_TNL_F_USE_ORIG_FLOWLABEL = 4 // copy the flowlabel from the inner packet + IP6_TNL_F_MIP6_DEV = 8 // being used for Mobile IPv6 + IP6_TNL_F_RCV_DSCP_COPY = 10 // copy DSCP from the outer packet + IP6_TNL_F_USE_ORIG_FWMARK = 20 // copy fwmark from inner packet + IP6_TNL_F_ALLOW_LOCAL_REMOTE = 40 // allow remote endpoint on the local node +) + type Sittun struct { LinkAttrs Link uint32 - Local net.IP - Remote net.IP Ttl uint8 Tos uint8 PMtuDisc uint8 + Proto uint8 + Local net.IP + Remote net.IP + EncapLimit uint8 EncapType uint16 EncapFlags uint16 EncapSport uint16 @@ -950,6 +1233,7 @@ type Gretun struct { EncapFlags uint16 EncapSport uint16 EncapDport uint16 + FlowBased bool } func (gretun *Gretun) Attrs() *LinkAttrs { @@ -993,6 +1277,7 @@ func (gtp *GTP) Type() string { } // Virtual XFRM Interfaces +// // Named "xfrmi" to prevent confusion with XFRM objects type Xfrmi struct { LinkAttrs @@ -1034,6 +1319,58 @@ var StringToIPoIBMode = map[string]IPoIBMode{ "connected": IPOIB_MODE_CONNECTED, } +const ( + CAN_STATE_ERROR_ACTIVE = iota + CAN_STATE_ERROR_WARNING + CAN_STATE_ERROR_PASSIVE + CAN_STATE_BUS_OFF + CAN_STATE_STOPPED + CAN_STATE_SLEEPING +) + +type Can struct { + LinkAttrs + + BitRate uint32 + SamplePoint uint32 + TimeQuanta uint32 + PropagationSegment uint32 + PhaseSegment1 uint32 + PhaseSegment2 uint32 + SyncJumpWidth uint32 + BitRatePreScaler uint32 + + Name string + TimeSegment1Min uint32 + TimeSegment1Max uint32 + TimeSegment2Min uint32 + TimeSegment2Max uint32 + SyncJumpWidthMax uint32 + BitRatePreScalerMin uint32 + BitRatePreScalerMax uint32 + BitRatePreScalerInc uint32 + + ClockFrequency uint32 + + State uint32 + + Mask uint32 + Flags uint32 + + TxError uint16 + RxError uint16 + + RestartMs uint32 +} + +func (can *Can) Attrs() *LinkAttrs { + return &can.LinkAttrs +} + +func (can *Can) Type() string { + return "can" +} + type IPoIB struct { LinkAttrs Pkey uint16 @@ -1049,11 +1386,27 @@ func (ipoib *IPoIB) Type() string { return "ipoib" } +type BareUDP struct { + LinkAttrs + Port uint16 + EtherType uint16 + SrcPortMin uint16 + MultiProto bool +} + +func (bareudp *BareUDP) Attrs() *LinkAttrs { + return &bareudp.LinkAttrs +} + +func (bareudp *BareUDP) Type() string { + return "bareudp" +} + // iproute2 supported devices; // vlan | veth | vcan | dummy | ifb | macvlan | macvtap | // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan | // gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon | -// bond_slave | ipvlan | xfrm +// bond_slave | ipvlan | xfrm | bareudp // LinkNotFoundError wraps the various not found errors when // getting/reading links. This is intended for better error diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go index ec915a0b979..d713612a907 100644 --- a/vendor/github.com/vishvananda/netlink/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -34,14 +34,27 @@ const ( TUNTAP_MULTI_QUEUE_DEFAULTS TuntapFlag = TUNTAP_MULTI_QUEUE | TUNTAP_NO_PI ) +var StringToTuntapModeMap = map[string]TuntapMode{ + "tun": TUNTAP_MODE_TUN, + "tap": TUNTAP_MODE_TAP, +} + +func (ttm TuntapMode) String() string { + switch ttm { + case TUNTAP_MODE_TUN: + return "tun" + case TUNTAP_MODE_TAP: + return "tap" + } + return "unknown" +} + const ( VF_LINK_STATE_AUTO uint32 = 0 VF_LINK_STATE_ENABLE uint32 = 1 VF_LINK_STATE_DISABLE uint32 = 2 ) -var lookupByDump = false - var macvlanModes = [...]uint32{ 0, nl.MACVLAN_MODE_PRIVATE, @@ -138,7 +151,6 @@ func (h *Handle) LinkSetAllmulticastOn(link Link) error { msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Change = unix.IFF_ALLMULTI msg.Flags = unix.IFF_ALLMULTI - msg.Index = int32(base.Index) req.AddData(msg) @@ -168,6 +180,51 @@ func (h *Handle) LinkSetAllmulticastOff(link Link) error { return err } +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func LinkSetMulticastOn(link Link) error { + return pkgHandle.LinkSetMulticastOn(link) +} + +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func (h *Handle) LinkSetMulticastOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Flags = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func LinkSetMulticastOff(link Link) error { + return pkgHandle.LinkSetMulticastOff(link) +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func (h *Handle) LinkSetMulticastOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error { return pkgHandle.MacvlanMACAddrAdd(link, addr) } @@ -237,6 +294,37 @@ func (h *Handle) macvlanMACAddrChange(link Link, addrs []net.HardwareAddr, mode return err } +// LinkSetMacvlanMode sets the mode of a macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + return pkgHandle.LinkSetMacvlanMode(link, mode) +} + +// LinkSetMacvlanMode sets the mode of the macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func (h *Handle) LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[mode])) + + req.AddData(linkInfo) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func BridgeSetMcastSnoop(link Link, on bool) error { return pkgHandle.BridgeSetMcastSnoop(link, on) } @@ -247,6 +335,26 @@ func (h *Handle) BridgeSetMcastSnoop(link Link, on bool) error { return h.linkModify(bridge, unix.NLM_F_ACK) } +func BridgeSetVlanFiltering(link Link, on bool) error { + return pkgHandle.BridgeSetVlanFiltering(link, on) +} + +func (h *Handle) BridgeSetVlanFiltering(link Link, on bool) error { + bridge := link.(*Bridge) + bridge.VlanFiltering = &on + return h.linkModify(bridge, unix.NLM_F_ACK) +} + +func BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + return pkgHandle.BridgeSetVlanDefaultPVID(link, pvid) +} + +func (h *Handle) BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + bridge := link.(*Bridge) + bridge.VlanDefaultPVID = &pvid + return h.linkModify(bridge, unix.NLM_F_ACK) +} + func SetPromiscOn(link Link) error { return pkgHandle.SetPromiscOn(link) } @@ -389,6 +497,58 @@ func (h *Handle) LinkSetAlias(link Link, name string) error { return err } +// LinkAddAltName adds a new alternative name for the link device. +// Equivalent to: `ip link property add $link altname $name` +func LinkAddAltName(link Link, name string) error { + return pkgHandle.LinkAddAltName(link, name) +} + +// LinkAddAltName adds a new alternative name for the link device. +// Equivalent to: `ip link property add $link altname $name` +func (h *Handle) LinkAddAltName(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINKPROP, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_PROP_LIST|unix.NLA_F_NESTED, nil) + data.AddRtAttr(unix.IFLA_ALT_IFNAME, []byte(name)) + + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkDelAltName delete an alternative name for the link device. +// Equivalent to: `ip link property del $link altname $name` +func LinkDelAltName(link Link, name string) error { + return pkgHandle.LinkDelAltName(link, name) +} + +// LinkDelAltName delete an alternative name for the link device. +// Equivalent to: `ip link property del $link altname $name` +func (h *Handle) LinkDelAltName(link Link, name string) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_DELLINKPROP, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_PROP_LIST|unix.NLA_F_NESTED, nil) + data.AddRtAttr(unix.IFLA_ALT_IFNAME, []byte(name)) + + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + // LinkSetHardwareAddr sets the hardware address of the link device. // Equivalent to: `ip link set $link address $hwaddr` func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { @@ -491,13 +651,50 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { req.AddData(msg) data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) vfmsg := nl.VfVlan{ Vf: uint32(vf), Vlan: uint32(vlan), Qos: uint32(qos), } - nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfVlanQosProto sets the vlan, qos and protocol of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos proto $proto` +func LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + return pkgHandle.LinkSetVfVlanQosProto(link, vf, vlan, qos, proto) +} + +// LinkSetVfVlanQosProto sets the vlan, qos and protocol of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos proto $proto` +func (h *Handle) LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + vfInfo := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfVlanList := vfInfo.AddRtAttr(nl.IFLA_VF_VLAN_LIST, nil) + + vfmsg := nl.VfVlanInfo{ + VfVlan: nl.VfVlan{ + Vf: uint32(vf), + Vlan: uint32(vlan), + Qos: uint32(qos), + }, + VlanProto: (uint16(proto)>>8)&0xFF | (uint16(proto)&0xFF)<<8, + } + + vfVlanList.AddRtAttr(nl.IFLA_VF_VLAN_INFO, vfmsg.Serialize()) req.AddData(data) _, err := req.Execute(unix.NETLINK_ROUTE, 0) @@ -848,6 +1045,141 @@ func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { return err } +// LinkSetGSOMaxSegs sets the GSO maximum segment count of the link device. +// Equivalent to: `ip link set $link gso_max_segs $maxSegs` +func LinkSetGSOMaxSegs(link Link, maxSegs int) error { + return pkgHandle.LinkSetGSOMaxSegs(link, maxSegs) +} + +// LinkSetGSOMaxSegs sets the GSO maximum segment count of the link device. +// Equivalent to: `ip link set $link gso_max_segs $maxSegs` +func (h *Handle) LinkSetGSOMaxSegs(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_MAX_SEGS, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGSOMaxSize sets the IPv6 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGSOMaxSize(link, maxSize) +} + +// LinkSetGSOMaxSize sets the IPv6 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func (h *Handle) LinkSetGSOMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGROMaxSize sets the IPv6 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func LinkSetGROMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGROMaxSize(link, maxSize) +} + +// LinkSetGROMaxSize sets the IPv6 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func (h *Handle) LinkSetGROMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGSOIPv4MaxSize sets the IPv4 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_ipv4_max_size $maxSize` +func LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGSOIPv4MaxSize(link, maxSize) +} + +// LinkSetGSOIPv4MaxSize sets the IPv4 GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_ipv4_max_size $maxSize` +func (h *Handle) LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGROIPv4MaxSize sets the IPv4 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_ipv4_max_size $maxSize` +func LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGROIPv4MaxSize(link, maxSize) +} + +// LinkSetGROIPv4MaxSize sets the IPv4 GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_ipv4_max_size $maxSize` +func (h *Handle) LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func boolAttr(val bool) []byte { var v uint8 if val { @@ -1005,8 +1337,8 @@ func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) { if bond.LpInterval >= 0 { data.AddRtAttr(nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval))) } - if bond.PackersPerSlave >= 0 { - data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave))) + if bond.PacketsPerSlave >= 0 { + data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PacketsPerSlave))) } if bond.LacpRate >= 0 { data.AddRtAttr(nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate))) @@ -1048,6 +1380,14 @@ func (h *Handle) LinkAdd(link Link) error { return h.linkModify(link, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) } +func LinkModify(link Link) error { + return pkgHandle.LinkModify(link) +} + +func (h *Handle) LinkModify(link Link) error { + return h.linkModify(link, unix.NLM_F_REQUEST|unix.NLM_F_ACK) +} + func (h *Handle) linkModify(link Link, flags int) error { // TODO: support extra data for macvlan base := link.Attrs() @@ -1060,8 +1400,6 @@ func (h *Handle) linkModify(link Link, flags int) error { } if isTuntap { - // TODO: support user - // TODO: support group if tuntap.Mode < unix.IFF_TUN || tuntap.Mode > unix.IFF_TAP { return fmt.Errorf("Tuntap.Mode %v unknown", tuntap.Mode) } @@ -1089,21 +1427,64 @@ func (h *Handle) linkModify(link Link, flags int) error { } req.Flags |= uint16(tuntap.Mode) - + const TUN = "/dev/net/tun" for i := 0; i < queues; i++ { localReq := req - file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0) + fd, err := unix.Open(TUN, os.O_RDWR|syscall.O_CLOEXEC, 0) if err != nil { cleanupFds(fds) return err } - fds = append(fds, file) - _, _, errno := unix.Syscall(unix.SYS_IOCTL, file.Fd(), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq))) + _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq))) if errno != 0 { + // close the new fd + unix.Close(fd) + // and the already opened ones cleanupFds(fds) return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed [%d], errno %v", i, errno) } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETOWNER, uintptr(tuntap.Owner)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETOWNER failed [%d], errno %v", i, errno) + } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETGROUP, uintptr(tuntap.Group)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETGROUP failed [%d], errno %v", i, errno) + } + + // Set the tun device to non-blocking before use. The below comment + // taken from: + // + // https://github.com/mistsys/tuntap/commit/161418c25003bbee77d085a34af64d189df62bea + // + // Note there is a complication because in go, if a device node is + // opened, go sets it to use nonblocking I/O. However a /dev/net/tun + // doesn't work with epoll until after the TUNSETIFF ioctl has been + // done. So we open the unix fd directly, do the ioctl, then put the + // fd in nonblocking mode, an then finally wrap it in a os.File, + // which will see the nonblocking mode and add the fd to the + // pollable set, so later on when we Read() from it blocked the + // calling thread in the kernel. + // + // See + // https://github.com/golang/go/issues/30426 + // which got exposed in go 1.13 by the fix to + // https://github.com/golang/go/issues/30624 + err = unix.SetNonblock(fd, true) + if err != nil { + cleanupFds(fds) + return fmt.Errorf("Tuntap set to non-blocking failed [%d], err %v", i, err) + } + + // create the file from the file descriptor and store it + file := os.NewFile(uintptr(fd), TUN) + fds = append(fds, file) + // 1) we only care for the name of the first tap in the multi queue set // 2) if the original name was empty, the localReq has now the actual name // @@ -1114,11 +1495,29 @@ func (h *Handle) linkModify(link Link, flags int) error { if i == 0 { link.Attrs().Name = strings.Trim(string(localReq.Name[:]), "\x00") } + + } + + control := func(file *os.File, f func(fd uintptr)) error { + name := file.Name() + conn, err := file.SyscallConn() + if err != nil { + return fmt.Errorf("SyscallConn() failed on %s: %v", name, err) + } + if err := conn.Control(f); err != nil { + return fmt.Errorf("Failed to get file descriptor for %s: %v", name, err) + } + return nil } // only persist interface if NonPersist is NOT set if !tuntap.NonPersist { - _, _, errno := unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 1) + var errno syscall.Errno + if err := control(fds[0], func(fd uintptr) { + _, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 1) + }); err != nil { + return err + } if errno != 0 { cleanupFds(fds) return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno) @@ -1135,7 +1534,10 @@ func (h *Handle) linkModify(link Link, flags int) error { // un-persist (e.g. allow the interface to be removed) the tuntap // should not hurt if not set prior, condition might be not needed if !tuntap.NonPersist { - _, _, _ = unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 0) + // ignore error + _ = control(fds[0], func(fd uintptr) { + _, _, _ = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 0) + }) } cleanupFds(fds) return err @@ -1193,6 +1595,11 @@ func (h *Handle) linkModify(link Link, flags int) error { nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(base.Name)) req.AddData(nameData) + if base.Alias != "" { + alias := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(base.Alias)) + req.AddData(alias) + } + if base.MTU > 0 { mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) req.AddData(mtu) @@ -1228,6 +1635,21 @@ func (h *Handle) linkModify(link Link, flags int) error { req.AddData(gsoAttr) } + if base.GROMaxSize > 0 { + groAttr := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, nl.Uint32Attr(base.GROMaxSize)) + req.AddData(groAttr) + } + + if base.GSOIPv4MaxSize > 0 { + gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, nl.Uint32Attr(base.GSOIPv4MaxSize)) + req.AddData(gsoAttr) + } + + if base.GROIPv4MaxSize > 0 { + groAttr := nl.NewRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, nl.Uint32Attr(base.GROIPv4MaxSize)) + req.AddData(groAttr) + } + if base.Group > 0 { groupAttr := nl.NewRtAttr(unix.IFLA_GROUP, nl.Uint32Attr(base.Group)) req.AddData(groupAttr) @@ -1264,6 +1686,10 @@ func (h *Handle) linkModify(link Link, flags int) error { if link.VlanProtocol != VLAN_PROTOCOL_UNKNOWN { data.AddRtAttr(nl.IFLA_VLAN_PROTOCOL, htons(uint16(link.VlanProtocol))) } + case *Netkit: + if err := addNetkitAttrs(link, linkInfo, flags); err != nil { + return err + } case *Veth: data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) peer := data.AddRtAttr(nl.VETH_INFO_PEER, nil) @@ -1272,12 +1698,28 @@ func (h *Handle) linkModify(link Link, flags int) error { if base.TxQLen >= 0 { peer.AddRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) } + if base.NumTxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_TX_QUEUES, nl.Uint32Attr(uint32(base.NumTxQueues))) + } + if base.NumRxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_RX_QUEUES, nl.Uint32Attr(uint32(base.NumRxQueues))) + } if base.MTU > 0 { peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) } if link.PeerHardwareAddr != nil { peer.AddRtAttr(unix.IFLA_ADDRESS, []byte(link.PeerHardwareAddr)) } + if link.PeerNamespace != nil { + switch ns := link.PeerNamespace.(type) { + case NsPid: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_PID, val) + case NsFd: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_FD, val) + } + } case *Vxlan: addVxlanAttrs(link, linkInfo) case *Bond: @@ -1286,16 +1728,16 @@ func (h *Handle) linkModify(link Link, flags int) error { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) + case *IPVtap: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) + data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) case *Macvlan: - if link.Mode != MACVLAN_MODE_DEFAULT { - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) - } + addMacvlanAttrs(link, linkInfo) case *Macvtap: - if link.Mode != MACVLAN_MODE_DEFAULT { - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) - } + addMacvtapAttrs(link, linkInfo) + case *Geneve: + addGeneveAttrs(link, linkInfo) case *Gretap: addGretapAttrs(link, linkInfo) case *Iptun: @@ -1318,6 +1760,8 @@ func (h *Handle) linkModify(link Link, flags int) error { addXfrmiAttrs(link, linkInfo) case *IPoIB: addIPoIBAttrs(link, linkInfo) + case *BareUDP: + addBareUDPAttrs(link, linkInfo) } req.AddData(linkInfo) @@ -1372,6 +1816,13 @@ func (h *Handle) linkByNameDump(name string) (Link, error) { if link.Attrs().Name == name { return link, nil } + + // support finding interfaces also via altnames + for _, altName := range link.Attrs().AltNames { + if altName == name { + return link, nil + } + } } return nil, LinkNotFoundError{fmt.Errorf("Link %s not found", name)} } @@ -1410,6 +1861,9 @@ func (h *Handle) LinkByName(name string) (Link, error) { req.AddData(attr) nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(name)) + if len(name) > 15 { + nameData = nl.NewRtAttr(unix.IFLA_ALT_IFNAME, nl.ZeroTerminated(name)) + } req.AddData(nameData) link, err := execGetLink(req) @@ -1499,7 +1953,7 @@ func execGetLink(req *nl.NetlinkRequest) (Link, error) { } } -// linkDeserialize deserializes a raw message received from netlink into +// LinkDeserialize deserializes a raw message received from netlink into // a link object. func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { msg := nl.DeserializeIfInfomsg(m) @@ -1509,10 +1963,19 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { return nil, err } - base := LinkAttrs{Index: int(msg.Index), RawFlags: msg.Flags, Flags: linkFlags(msg.Flags), EncapType: msg.EncapType()} - if msg.Flags&unix.IFF_PROMISC != 0 { - base.Promisc = 1 + base := NewLinkAttrs() + base.Index = int(msg.Index) + base.RawFlags = msg.Flags + base.Flags = linkFlags(msg.Flags) + base.EncapType = msg.EncapType() + base.NetNsID = -1 + if msg.Flags&unix.IFF_ALLMULTI != 0 { + base.Allmulti = 1 } + if msg.Flags&unix.IFF_MULTICAST != 0 { + base.Multi = 1 + } + var ( link Link stats32 *LinkStatistics32 @@ -1541,18 +2004,26 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { link = &Bridge{} case "vlan": link = &Vlan{} + case "netkit": + link = &Netkit{} case "veth": link = &Veth{} + case "wireguard": + link = &Wireguard{} case "vxlan": link = &Vxlan{} case "bond": link = &Bond{} case "ipvlan": link = &IPVlan{} + case "ipvtap": + link = &IPVtap{} case "macvlan": link = &Macvlan{} case "macvtap": link = &Macvtap{} + case "geneve": + link = &Geneve{} case "gretap": link = &Gretap{} case "ip6gretap": @@ -1579,6 +2050,10 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { link = &Tuntap{} case "ipoib": link = &IPoIB{} + case "can": + link = &Can{} + case "bareudp": + link = &BareUDP{} default: link = &GenericLink{LinkType: linkType} } @@ -1588,6 +2063,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { return nil, err } switch linkType { + case "netkit": + parseNetkitData(link, data) case "vlan": parseVlanData(link, data) case "vxlan": @@ -1596,10 +2073,14 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { parseBondData(link, data) case "ipvlan": parseIPVlanData(link, data) + case "ipvtap": + parseIPVtapData(link, data) case "macvlan": parseMacvlanData(link, data) case "macvtap": parseMacvtapData(link, data) + case "geneve": + parseGeneveData(link, data) case "gretap": parseGretapData(link, data) case "ip6gretap": @@ -1628,13 +2109,21 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { parseTuntapData(link, data) case "ipoib": parseIPoIBData(link, data) + case "can": + parseCanData(link, data) + case "bareudp": + parseBareUDPData(link, data) } + case nl.IFLA_INFO_SLAVE_KIND: slaveType = string(info.Value[:len(info.Value)-1]) switch slaveType { case "bond": linkSlave = &BondSlave{} + case "vrf": + linkSlave = &VrfSlave{} } + case nl.IFLA_INFO_SLAVE_DATA: switch slaveType { case "bond": @@ -1643,6 +2132,12 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { return nil, err } parseBondSlaveData(linkSlave, data) + case "vrf": + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + parseVrfSlaveData(linkSlave, data) } } } @@ -1660,6 +2155,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.Name = string(attr.Value[:len(attr.Value)-1]) case unix.IFLA_MTU: base.MTU = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_PROMISCUITY: + base.Promisc = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_LINK: base.ParentIndex = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_MASTER: @@ -1694,14 +2191,38 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { protinfo := parseProtinfo(attrs) base.Protinfo = &protinfo } + case unix.IFLA_PROP_LIST | unix.NLA_F_NESTED: + attrs, err := nl.ParseRouteAttr(attr.Value[:]) + if err != nil { + return nil, err + } + + base.AltNames = []string{} + for _, attr := range attrs { + if attr.Attr.Type == unix.IFLA_ALT_IFNAME { + base.AltNames = append(base.AltNames, nl.BytesToString(attr.Value)) + } + } case unix.IFLA_OPERSTATE: base.OperState = LinkOperState(uint8(attr.Value[0])) + case unix.IFLA_PHYS_SWITCH_ID: + base.PhysSwitchID = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_LINK_NETNSID: base.NetNsID = int(native.Uint32(attr.Value[0:4])) - case unix.IFLA_GSO_MAX_SIZE: - base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_TSO_MAX_SEGS: + base.TSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_TSO_MAX_SIZE: + base.TSOMaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_GSO_MAX_SEGS: base.GSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_MAX_SIZE: + base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GRO_MAX_SIZE: + base.GROMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_IPV4_MAX_SIZE: + base.GSOIPv4MaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GRO_IPV4_MAX_SIZE: + base.GROIPv4MaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_VFINFO_LIST: data, err := nl.ParseRouteAttr(attr.Value) if err != nil { @@ -1718,6 +2239,13 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.NumRxQueues = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_GROUP: base.Group = native.Uint32(attr.Value[0:4]) + case unix.IFLA_PERM_ADDRESS: + for _, b := range attr.Value { + if b != 0 { + base.PermHWAddr = attr.Value[:] + break + } + } } } @@ -1830,21 +2358,24 @@ type LinkUpdate struct { // LinkSubscribe takes a chan down which notifications will be sent // when links change. Close the 'done' chan to stop subscription. func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // LinkSubscribeAt works like LinkSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribeAt(ns, netns.None(), ch, done, nil, false) + return linkSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // LinkSubscribeOptions contains a set of options to use with // LinkSubscribeWithOptions. type LinkSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // LinkSubscribeWithOptions work like LinkSubscribe but enable to @@ -1855,14 +2386,27 @@ func LinkSubscribeWithOptions(ch chan<- LinkUpdate, done <-chan struct{}, option none := netns.None() options.Namespace = &none } - return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_LINK) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -1884,7 +2428,8 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -1899,15 +2444,15 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() error := int32(native.Uint32(m.Data[0:4])) if error == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) } - return + continue } ifmsg := nl.DeserializeIfInfomsg(m.Data) header := unix.NlMsghdr(m.Header) @@ -1916,7 +2461,7 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c if cberr != nil { cberr(err) } - return + continue } ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: header, Link: link} } @@ -1942,6 +2487,16 @@ func (h *Handle) LinkSetGuard(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_GUARD) } +// LinkSetBRSlaveGroupFwdMask set the group_fwd_mask of a bridge slave interface +func LinkSetBRSlaveGroupFwdMask(link Link, mask uint16) error { + return pkgHandle.LinkSetBRSlaveGroupFwdMask(link, mask) +} + +// LinkSetBRSlaveGroupFwdMask set the group_fwd_mask of a bridge slave interface +func (h *Handle) LinkSetBRSlaveGroupFwdMask(link Link, mask uint16) error { + return h.setProtinfoAttrRawVal(link, nl.Uint16Attr(mask), nl.IFLA_BRPORT_GROUP_FWD_MASK) +} + func LinkSetFastLeave(link Link, mode bool) error { return pkgHandle.LinkSetFastLeave(link, mode) } @@ -1974,6 +2529,14 @@ func (h *Handle) LinkSetFlood(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_UNICAST_FLOOD) } +func LinkSetIsolated(link Link, mode bool) error { + return pkgHandle.LinkSetIsolated(link, mode) +} + +func (h *Handle) LinkSetIsolated(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_ISOLATED) +} + func LinkSetBrProxyArp(link Link, mode bool) error { return pkgHandle.LinkSetBrProxyArp(link, mode) } @@ -1990,7 +2553,15 @@ func (h *Handle) LinkSetBrProxyArpWiFi(link Link, mode bool) error { return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_PROXYARP_WIFI) } -func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { +func LinkSetBrNeighSuppress(link Link, mode bool) error { + return pkgHandle.LinkSetBrNeighSuppress(link, mode) +} + +func (h *Handle) LinkSetBrNeighSuppress(link Link, mode bool) error { + return h.setProtinfoAttr(link, mode, nl.IFLA_BRPORT_NEIGH_SUPPRESS) +} + +func (h *Handle) setProtinfoAttrRawVal(link Link, val []byte, attr int) error { base := link.Attrs() h.ensureIndex(base) req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) @@ -2000,7 +2571,7 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { req.AddData(msg) br := nl.NewRtAttr(unix.IFLA_PROTINFO|unix.NLA_F_NESTED, nil) - br.AddRtAttr(attr, boolToByte(mode)) + br.AddRtAttr(attr, val) req.AddData(br) _, err := req.Execute(unix.NETLINK_ROUTE, 0) if err != nil { @@ -2008,6 +2579,9 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { } return nil } +func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { + return h.setProtinfoAttrRawVal(link, boolToByte(mode), attr) +} // LinkSetTxQLen sets the transaction queue length for the link. // Equivalent to: `ip link set $link txqlen $qlen` @@ -2065,6 +2639,80 @@ func (h *Handle) LinkSetGroup(link Link, group int) error { return err } +func addNetkitAttrs(nk *Netkit, linkInfo *nl.RtAttr, flag int) error { + if nk.peerLinkAttrs.HardwareAddr != nil || nk.HardwareAddr != nil { + return fmt.Errorf("netkit doesn't support setting Ethernet") + } + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + // Kernel will return error if trying to change the mode of an existing netkit device + data.AddRtAttr(nl.IFLA_NETKIT_MODE, nl.Uint32Attr(uint32(nk.Mode))) + data.AddRtAttr(nl.IFLA_NETKIT_POLICY, nl.Uint32Attr(uint32(nk.Policy))) + data.AddRtAttr(nl.IFLA_NETKIT_PEER_POLICY, nl.Uint32Attr(uint32(nk.PeerPolicy))) + + if (flag & unix.NLM_F_EXCL) == 0 { + // Modifying peer link attributes will not take effect + return nil + } + + peer := data.AddRtAttr(nl.IFLA_NETKIT_PEER_INFO, nil) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + if nk.peerLinkAttrs.Flags&net.FlagUp != 0 { + msg.Change = unix.IFF_UP + msg.Flags = unix.IFF_UP + } + if nk.peerLinkAttrs.Index != 0 { + msg.Index = int32(nk.peerLinkAttrs.Index) + } + peer.AddChild(msg) + if nk.peerLinkAttrs.Name != "" { + peer.AddRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(nk.peerLinkAttrs.Name)) + } + if nk.peerLinkAttrs.MTU > 0 { + peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(nk.peerLinkAttrs.MTU))) + } + if nk.peerLinkAttrs.GSOMaxSegs > 0 { + peer.AddRtAttr(unix.IFLA_GSO_MAX_SEGS, nl.Uint32Attr(nk.peerLinkAttrs.GSOMaxSegs)) + } + if nk.peerLinkAttrs.GSOMaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GSO_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GSOMaxSize)) + } + if nk.peerLinkAttrs.GSOIPv4MaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GSO_IPV4_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GSOIPv4MaxSize)) + } + if nk.peerLinkAttrs.GROIPv4MaxSize > 0 { + peer.AddRtAttr(unix.IFLA_GRO_IPV4_MAX_SIZE, nl.Uint32Attr(nk.peerLinkAttrs.GROIPv4MaxSize)) + } + if nk.peerLinkAttrs.Namespace != nil { + switch ns := nk.peerLinkAttrs.Namespace.(type) { + case NsPid: + peer.AddRtAttr(unix.IFLA_NET_NS_PID, nl.Uint32Attr(uint32(ns))) + case NsFd: + peer.AddRtAttr(unix.IFLA_NET_NS_FD, nl.Uint32Attr(uint32(ns))) + } + } + return nil +} + +func parseNetkitData(link Link, data []syscall.NetlinkRouteAttr) { + netkit := link.(*Netkit) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_NETKIT_PRIMARY: + isPrimary := datum.Value[0:1][0] + if isPrimary != 0 { + netkit.isPrimary = true + } + case nl.IFLA_NETKIT_MODE: + netkit.Mode = NetkitMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_NETKIT_POLICY: + netkit.Policy = NetkitPolicy(native.Uint32(datum.Value[0:4])) + case nl.IFLA_NETKIT_PEER_POLICY: + netkit.PeerPolicy = NetkitPolicy(native.Uint32(datum.Value[0:4])) + } + } +} + func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { vlan := link.(*Vlan) for _, datum := range data { @@ -2080,6 +2728,13 @@ func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { vxlan := link.(*Vxlan) for _, datum := range data { + // NOTE(vish): Apparently some messages can be sent with no value. + // We special case GBP here to not change existing + // functionality. It appears that GBP sends a datum.Value + // of null. + if len(datum.Value) == 0 && datum.Attr.Type != nl.IFLA_VXLAN_GBP { + continue + } switch datum.Attr.Type { case nl.IFLA_VXLAN_ID: vxlan.VxlanId = int(native.Uint32(datum.Value[0:4])) @@ -2178,7 +2833,7 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BOND_LP_INTERVAL: bond.LpInterval = int(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_PACKETS_PER_SLAVE: - bond.PackersPerSlave = int(native.Uint32(data[i].Value[0:4])) + bond.PacketsPerSlave = int(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_AD_LACP_RATE: bond.LacpRate = BondLacpRate(data[i].Value[0]) case nl.IFLA_BOND_AD_SELECT: @@ -2258,6 +2913,16 @@ func parseBondSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { } } +func parseVrfSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { + vrfSlave := slave.(*VrfSlave) + for i := range data { + switch data[i].Attr.Type { + case nl.IFLA_BOND_SLAVE_STATE: + vrfSlave.Table = native.Uint32(data[i].Value[0:4]) + } + } +} + func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { ipv := link.(*IPVlan) for _, datum := range data { @@ -2270,11 +2935,42 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { } } +func parseIPVtapData(link Link, data []syscall.NetlinkRouteAttr) { + ipv := link.(*IPVtap) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPVLAN_MODE: + ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_IPVLAN_FLAG: + ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4])) + } + } +} + +func addMacvtapAttrs(macvtap *Macvtap, linkInfo *nl.RtAttr) { + addMacvlanAttrs(&macvtap.Macvlan, linkInfo) +} + func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvtap) parseMacvlanData(&macv.Macvlan, data) } +func addMacvlanAttrs(macvlan *Macvlan, linkInfo *nl.RtAttr) { + var data *nl.RtAttr + + if macvlan.Mode != MACVLAN_MODE_DEFAULT || macvlan.BCQueueLen > 0 { + data = linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + } + + if macvlan.Mode != MACVLAN_MODE_DEFAULT { + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macvlan.Mode])) + } + if macvlan.BCQueueLen > 0 { + data.AddRtAttr(nl.IFLA_MACVLAN_BC_QUEUE_LEN, nl.Uint32Attr(macvlan.BCQueueLen)) + } +} + func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvlan) for _, datum := range data { @@ -2302,6 +2998,10 @@ func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { for _, macDatum := range macs { macv.MACAddrs = append(macv.MACAddrs, net.HardwareAddr(macDatum.Value[0:6])) } + case nl.IFLA_MACVLAN_BC_QUEUE_LEN: + macv.BCQueueLen = native.Uint32(datum.Value[0:4]) + case nl.IFLA_MACVLAN_BC_QUEUE_LEN_USED: + macv.UsedBCQueueLen = native.Uint32(datum.Value[0:4]) } } } @@ -2327,12 +3027,73 @@ func linkFlags(rawFlags uint32) net.Flags { return f } +func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if geneve.InnerProtoInherit { + data.AddRtAttr(nl.IFLA_GENEVE_INNER_PROTO_INHERIT, []byte{}) + } + + if geneve.FlowBased { + geneve.ID = 0 + data.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, []byte{}) + } + + if ip := geneve.Remote; ip != nil { + if ip4 := ip.To4(); ip4 != nil { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE, ip.To4()) + } else { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE6, []byte(ip)) + } + } + + if geneve.ID != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_ID, nl.Uint32Attr(geneve.ID)) + } + + if geneve.Dport != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_PORT, htons(geneve.Dport)) + } + + if geneve.Ttl != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TTL, nl.Uint8Attr(geneve.Ttl)) + } + + if geneve.Tos != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TOS, nl.Uint8Attr(geneve.Tos)) + } + + data.AddRtAttr(nl.IFLA_GENEVE_DF, nl.Uint8Attr(uint8(geneve.Df))) +} + +func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) { + geneve := link.(*Geneve) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GENEVE_ID: + geneve.ID = native.Uint32(datum.Value[0:4]) + case nl.IFLA_GENEVE_REMOTE, nl.IFLA_GENEVE_REMOTE6: + geneve.Remote = datum.Value + case nl.IFLA_GENEVE_PORT: + geneve.Dport = ntohs(datum.Value[0:2]) + case nl.IFLA_GENEVE_TTL: + geneve.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GENEVE_TOS: + geneve.Tos = uint8(datum.Value[0]) + case nl.IFLA_GENEVE_COLLECT_METADATA: + geneve.FlowBased = true + case nl.IFLA_GENEVE_INNER_PROTO_INHERIT: + geneve.InnerProtoInherit = true + } + } +} + func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) if gretap.FlowBased { // In flow based mode, no other attributes need to be configured - data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) return } @@ -2415,6 +3176,12 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if gre.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) + return + } + if ip := gre.Local; ip != nil { if ip.To4() != nil { ip = ip.To4() @@ -2485,6 +3252,8 @@ func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) { gre.EncapSport = ntohs(datum.Value[0:2]) case nl.IFLA_GRE_ENCAP_DPORT: gre.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_COLLECT_METADATA: + gre.FlowBased = true } } } @@ -2513,7 +3282,8 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { case nl.IFLA_XDP_FD: xdp.Fd = int(native.Uint32(attr.Value[0:4])) case nl.IFLA_XDP_ATTACHED: - xdp.Attached = attr.Value[0] != 0 + xdp.AttachMode = uint32(attr.Value[0]) + xdp.Attached = xdp.AttachMode != 0 case nl.IFLA_XDP_FLAGS: xdp.Flags = native.Uint32(attr.Value[0:4]) case nl.IFLA_XDP_PROG_ID: @@ -2524,14 +3294,14 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { } func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if iptun.FlowBased { // In flow based mode, no other attributes need to be configured - linkInfo.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, boolAttr(iptun.FlowBased)) + data.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, []byte{}) return } - data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) - ip := iptun.Local.To4() if ip != nil { data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) @@ -2552,6 +3322,7 @@ func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport)) + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(iptun.Proto)) } func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2577,7 +3348,9 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_IPTUN_ENCAP_FLAGS: iptun.EncapFlags = native.Uint16(datum.Value[0:2]) case nl.IFLA_IPTUN_COLLECT_METADATA: - iptun.FlowBased = int8(datum.Value[0]) != 0 + iptun.FlowBased = true + case nl.IFLA_IPTUN_PROTO: + iptun.Proto = datum.Value[0] } } } @@ -2585,6 +3358,12 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if ip6tnl.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, []byte{}) + return + } + if ip6tnl.Link != 0 { data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(ip6tnl.Link)) } @@ -2601,10 +3380,14 @@ func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(ip6tnl.Ttl)) data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(ip6tnl.Tos)) - data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit)) data.AddRtAttr(nl.IFLA_IPTUN_FLAGS, nl.Uint32Attr(ip6tnl.Flags)) data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(ip6tnl.Proto)) data.AddRtAttr(nl.IFLA_IPTUN_FLOWINFO, nl.Uint32Attr(ip6tnl.FlowInfo)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(ip6tnl.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(ip6tnl.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(ip6tnl.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(ip6tnl.EncapDport)) } func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2616,17 +3399,27 @@ func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_IPTUN_REMOTE: ip6tnl.Remote = net.IP(datum.Value[:16]) case nl.IFLA_IPTUN_TTL: - ip6tnl.Ttl = uint8(datum.Value[0]) + ip6tnl.Ttl = datum.Value[0] case nl.IFLA_IPTUN_TOS: - ip6tnl.Tos = uint8(datum.Value[0]) - case nl.IFLA_IPTUN_ENCAP_LIMIT: - ip6tnl.EncapLimit = uint8(datum.Value[0]) + ip6tnl.Tos = datum.Value[0] case nl.IFLA_IPTUN_FLAGS: ip6tnl.Flags = native.Uint32(datum.Value[:4]) case nl.IFLA_IPTUN_PROTO: - ip6tnl.Proto = uint8(datum.Value[0]) + ip6tnl.Proto = datum.Value[0] case nl.IFLA_IPTUN_FLOWINFO: ip6tnl.FlowInfo = native.Uint32(datum.Value[:4]) + case nl.IFLA_IPTUN_ENCAP_LIMIT: + ip6tnl.EncapLimit = datum.Value[0] + case nl.IFLA_IPTUN_ENCAP_TYPE: + ip6tnl.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + ip6tnl.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + ip6tnl.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + ip6tnl.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_COLLECT_METADATA: + ip6tnl.FlowBased = true } } } @@ -2653,8 +3446,10 @@ func addSittunAttrs(sittun *Sittun, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(sittun.Ttl)) } + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(sittun.Proto)) data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(sittun.Tos)) data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(sittun.PMtuDisc)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(sittun.EncapLimit)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(sittun.EncapType)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(sittun.EncapFlags)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(sittun.EncapSport)) @@ -2670,11 +3465,13 @@ func parseSittunData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_IPTUN_REMOTE: sittun.Remote = net.IP(datum.Value[0:4]) case nl.IFLA_IPTUN_TTL: - sittun.Ttl = uint8(datum.Value[0]) + sittun.Ttl = datum.Value[0] case nl.IFLA_IPTUN_TOS: - sittun.Tos = uint8(datum.Value[0]) + sittun.Tos = datum.Value[0] case nl.IFLA_IPTUN_PMTUDISC: - sittun.PMtuDisc = uint8(datum.Value[0]) + sittun.PMtuDisc = datum.Value[0] + case nl.IFLA_IPTUN_PROTO: + sittun.Proto = datum.Value[0] case nl.IFLA_IPTUN_ENCAP_TYPE: sittun.EncapType = native.Uint16(datum.Value[0:2]) case nl.IFLA_IPTUN_ENCAP_FLAGS: @@ -2761,18 +3558,30 @@ func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) { if bridge.MulticastSnooping != nil { data.AddRtAttr(nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping)) } + if bridge.AgeingTime != nil { + data.AddRtAttr(nl.IFLA_BR_AGEING_TIME, nl.Uint32Attr(*bridge.AgeingTime)) + } if bridge.HelloTime != nil { data.AddRtAttr(nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime)) } if bridge.VlanFiltering != nil { data.AddRtAttr(nl.IFLA_BR_VLAN_FILTERING, boolToByte(*bridge.VlanFiltering)) } + if bridge.VlanDefaultPVID != nil { + data.AddRtAttr(nl.IFLA_BR_VLAN_DEFAULT_PVID, nl.Uint16Attr(*bridge.VlanDefaultPVID)) + } + if bridge.GroupFwdMask != nil { + data.AddRtAttr(nl.IFLA_BR_GROUP_FWD_MASK, nl.Uint16Attr(*bridge.GroupFwdMask)) + } } func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { br := bridge.(*Bridge) for _, datum := range data { switch datum.Attr.Type { + case nl.IFLA_BR_AGEING_TIME: + ageingTime := native.Uint32(datum.Value[0:4]) + br.AgeingTime = &ageingTime case nl.IFLA_BR_HELLO_TIME: helloTime := native.Uint32(datum.Value[0:4]) br.HelloTime = &helloTime @@ -2782,6 +3591,12 @@ func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BR_VLAN_FILTERING: vlanFiltering := datum.Value[0] == 1 br.VlanFiltering = &vlanFiltering + case nl.IFLA_BR_VLAN_DEFAULT_PVID: + vlanDefaultPVID := native.Uint16(datum.Value[0:2]) + br.VlanDefaultPVID = &vlanDefaultPVID + case nl.IFLA_BR_GROUP_FWD_MASK: + mask := native.Uint16(datum.Value[0:2]) + br.GroupFwdMask = &mask } } } @@ -2823,12 +3638,17 @@ func parseVfInfoList(data []syscall.NetlinkRouteAttr) ([]VfInfo, error) { if err != nil { return nil, err } - vfs = append(vfs, parseVfInfo(vfAttrs, i)) + + vf, err := parseVfInfo(vfAttrs, i) + if err != nil { + return nil, err + } + vfs = append(vfs, vf) } return vfs, nil } -func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { +func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) (VfInfo, error) { vf := VfInfo{ID: id} for _, element := range data { switch element.Attr.Type { @@ -2839,6 +3659,12 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { vl := nl.DeserializeVfVlan(element.Value[:]) vf.Vlan = int(vl.Vlan) vf.Qos = int(vl.Qos) + case nl.IFLA_VF_VLAN_LIST: + vfVlanInfoList, err := nl.DeserializeVfVlanList(element.Value[:]) + if err != nil { + return vf, err + } + vf.VlanProto = int(vfVlanInfoList[0].VlanProto) case nl.IFLA_VF_TX_RATE: txr := nl.DeserializeVfTxRate(element.Value[:]) vf.TxRate = int(txr.Rate) @@ -2852,16 +3678,35 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { vfr := nl.DeserializeVfRate(element.Value[:]) vf.MaxTxRate = vfr.MaxTxRate vf.MinTxRate = vfr.MinTxRate + case nl.IFLA_VF_STATS: + vfstats := nl.DeserializeVfStats(element.Value[:]) + vf.RxPackets = vfstats.RxPackets + vf.TxPackets = vfstats.TxPackets + vf.RxBytes = vfstats.RxBytes + vf.TxBytes = vfstats.TxBytes + vf.Multicast = vfstats.Multicast + vf.Broadcast = vfstats.Broadcast + vf.RxDropped = vfstats.RxDropped + vf.TxDropped = vfstats.TxDropped + + case nl.IFLA_VF_RSS_QUERY_EN: + result := nl.DeserializeVfRssQueryEn(element.Value) + vf.RssQuery = result.Setting + + case nl.IFLA_VF_TRUST: + result := nl.DeserializeVfTrust(element.Value) + vf.Trust = result.Setting } } - return vf + return vf, nil } func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) data.AddRtAttr(nl.IFLA_XFRM_LINK, nl.Uint32Attr(uint32(xfrmi.ParentIndex))) - data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) - + if xfrmi.Ifid != 0 { + data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) + } } func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2876,8 +3721,7 @@ func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { } } -// LinkSetBondSlave add slave to bond link via ioctl interface. -func LinkSetBondSlave(link Link, master *Bond) error { +func ioctlBondSlave(cmd uintptr, link Link, master *Bond) error { fd, err := getSocketUDP() if err != nil { return err @@ -2885,10 +3729,38 @@ func LinkSetBondSlave(link Link, master *Bond) error { defer syscall.Close(fd) ifreq := newIocltSlaveReq(link.Attrs().Name, master.Attrs().Name) - - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCBONDENSLAVE, uintptr(unsafe.Pointer(ifreq))) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), cmd, uintptr(unsafe.Pointer(ifreq))) if errno != 0 { - return fmt.Errorf("Failed to enslave %q to %q, errno=%v", link.Attrs().Name, master.Attrs().Name, errno) + return fmt.Errorf("errno=%v", errno) + } + return nil +} + +// LinkSetBondSlaveActive sets specified slave to ACTIVE in an `active-backup` bond link via ioctl interface. +// +// Multiple calls keeps the status unchanged(shown in the unit test). +func LinkSetBondSlaveActive(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDCHANGEACTIVE, link, master) + if err != nil { + return fmt.Errorf("Failed to set slave %q active in %q, %v", link.Attrs().Name, master.Attrs().Name, err) + } + return nil +} + +// LinkSetBondSlave add slave to bond link via ioctl interface. +func LinkSetBondSlave(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDENSLAVE, link, master) + if err != nil { + return fmt.Errorf("Failed to enslave %q to %q, %v", link.Attrs().Name, master.Attrs().Name, err) + } + return nil +} + +// LinkSetBondSlave removes specified slave from bond link via ioctl interface. +func LinkDelBondSlave(link Link, master *Bond) error { + err := ioctlBondSlave(unix.SIOCBONDRELEASE, link, master) + if err != nil { + return fmt.Errorf("Failed to del slave %q from %q, %v", link.Attrs().Name, master.Attrs().Name, err) } return nil } @@ -3010,9 +3882,86 @@ func parseIPoIBData(link Link, data []syscall.NetlinkRouteAttr) { } } +func parseCanData(link Link, data []syscall.NetlinkRouteAttr) { + can := link.(*Can) + for _, datum := range data { + + switch datum.Attr.Type { + case nl.IFLA_CAN_BITTIMING: + can.BitRate = native.Uint32(datum.Value) + can.SamplePoint = native.Uint32(datum.Value[4:]) + can.TimeQuanta = native.Uint32(datum.Value[8:]) + can.PropagationSegment = native.Uint32(datum.Value[12:]) + can.PhaseSegment1 = native.Uint32(datum.Value[16:]) + can.PhaseSegment2 = native.Uint32(datum.Value[20:]) + can.SyncJumpWidth = native.Uint32(datum.Value[24:]) + can.BitRatePreScaler = native.Uint32(datum.Value[28:]) + case nl.IFLA_CAN_BITTIMING_CONST: + can.Name = string(datum.Value[:16]) + can.TimeSegment1Min = native.Uint32(datum.Value[16:]) + can.TimeSegment1Max = native.Uint32(datum.Value[20:]) + can.TimeSegment2Min = native.Uint32(datum.Value[24:]) + can.TimeSegment2Max = native.Uint32(datum.Value[28:]) + can.SyncJumpWidthMax = native.Uint32(datum.Value[32:]) + can.BitRatePreScalerMin = native.Uint32(datum.Value[36:]) + can.BitRatePreScalerMax = native.Uint32(datum.Value[40:]) + can.BitRatePreScalerInc = native.Uint32(datum.Value[44:]) + case nl.IFLA_CAN_CLOCK: + can.ClockFrequency = native.Uint32(datum.Value) + case nl.IFLA_CAN_STATE: + can.State = native.Uint32(datum.Value) + case nl.IFLA_CAN_CTRLMODE: + can.Mask = native.Uint32(datum.Value) + can.Flags = native.Uint32(datum.Value[4:]) + case nl.IFLA_CAN_BERR_COUNTER: + can.TxError = native.Uint16(datum.Value) + can.RxError = native.Uint16(datum.Value[2:]) + case nl.IFLA_CAN_RESTART_MS: + can.RestartMs = native.Uint32(datum.Value) + case nl.IFLA_CAN_DATA_BITTIMING_CONST: + case nl.IFLA_CAN_RESTART: + case nl.IFLA_CAN_DATA_BITTIMING: + case nl.IFLA_CAN_TERMINATION: + case nl.IFLA_CAN_TERMINATION_CONST: + case nl.IFLA_CAN_BITRATE_CONST: + case nl.IFLA_CAN_DATA_BITRATE_CONST: + case nl.IFLA_CAN_BITRATE_MAX: + } + } +} + func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) data.AddRtAttr(nl.IFLA_IPOIB_PKEY, nl.Uint16Attr(uint16(ipoib.Pkey))) data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode))) data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast))) } + +func addBareUDPAttrs(bareudp *BareUDP, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + data.AddRtAttr(nl.IFLA_BAREUDP_PORT, nl.Uint16Attr(nl.Swap16(bareudp.Port))) + data.AddRtAttr(nl.IFLA_BAREUDP_ETHERTYPE, nl.Uint16Attr(nl.Swap16(bareudp.EtherType))) + if bareudp.SrcPortMin != 0 { + data.AddRtAttr(nl.IFLA_BAREUDP_SRCPORT_MIN, nl.Uint16Attr(bareudp.SrcPortMin)) + } + if bareudp.MultiProto { + data.AddRtAttr(nl.IFLA_BAREUDP_MULTIPROTO_MODE, []byte{}) + } +} + +func parseBareUDPData(link Link, data []syscall.NetlinkRouteAttr) { + bareudp := link.(*BareUDP) + for _, attr := range data { + switch attr.Attr.Type { + case nl.IFLA_BAREUDP_PORT: + bareudp.Port = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_ETHERTYPE: + bareudp.EtherType = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_SRCPORT_MIN: + bareudp.SrcPortMin = native.Uint16(attr.Value) + case nl.IFLA_BAREUDP_MULTIPROTO_MODE: + bareudp.MultiProto = true + } + } +} diff --git a/vendor/github.com/vishvananda/netlink/neigh.go b/vendor/github.com/vishvananda/netlink/neigh.go index 379e5655f73..32d722e8858 100644 --- a/vendor/github.com/vishvananda/netlink/neigh.go +++ b/vendor/github.com/vishvananda/netlink/neigh.go @@ -12,6 +12,7 @@ type Neigh struct { State int Type int Flags int + FlagsExt int IP net.IP HardwareAddr net.HardwareAddr LLIPAddr net.IP //Used in the case of NHRP diff --git a/vendor/github.com/vishvananda/netlink/neigh_linux.go b/vendor/github.com/vishvananda/netlink/neigh_linux.go index cb3b55d3508..2d93044a6ea 100644 --- a/vendor/github.com/vishvananda/netlink/neigh_linux.go +++ b/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -24,7 +24,11 @@ const ( NDA_MASTER NDA_LINK_NETNSID NDA_SRC_VNI - NDA_MAX = NDA_SRC_VNI + NDA_PROTOCOL + NDA_NH_ID + NDA_FDB_EXT_ATTRS + NDA_FLAGS_EXT + NDA_MAX = NDA_FLAGS_EXT ) // Neighbor Cache Entry States. @@ -42,11 +46,19 @@ const ( // Neighbor Flags const ( - NTF_USE = 0x01 - NTF_SELF = 0x02 - NTF_MASTER = 0x04 - NTF_PROXY = 0x08 - NTF_ROUTER = 0x80 + NTF_USE = 0x01 + NTF_SELF = 0x02 + NTF_MASTER = 0x04 + NTF_PROXY = 0x08 + NTF_EXT_LEARNED = 0x10 + NTF_OFFLOADED = 0x20 + NTF_STICKY = 0x40 + NTF_ROUTER = 0x80 +) + +// Extended Neighbor Flags +const ( + NTF_EXT_MANAGED = 0x00000001 ) // Ndmsg is for adding, removing or receiving information about a neighbor table entry @@ -162,11 +174,16 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { if neigh.LLIPAddr != nil { llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4()) req.AddData(llIPData) - } else if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil { + } else if neigh.HardwareAddr != nil { hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr)) req.AddData(hwData) } + if neigh.FlagsExt != 0 { + flagsExtData := nl.NewRtAttr(NDA_FLAGS_EXT, nl.Uint32Attr(uint32(neigh.FlagsExt))) + req.AddData(flagsExtData) + } + if neigh.Vlan != 0 { vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan))) req.AddData(vlanData) @@ -243,6 +260,18 @@ func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) { // Ignore messages from other interfaces continue } + if msg.Family != 0 && ndm.Family != msg.Family { + continue + } + if msg.State != 0 && ndm.State != msg.State { + continue + } + if msg.Type != 0 && ndm.Type != msg.Type { + continue + } + if msg.Flags != 0 && ndm.Flags != msg.Flags { + continue + } neigh, err := NeighDeserialize(m) if err != nil { @@ -293,6 +322,8 @@ func NeighDeserialize(m []byte) (*Neigh, error) { } else { neigh.HardwareAddr = net.HardwareAddr(attr.Value) } + case NDA_FLAGS_EXT: + neigh.FlagsExt = int(native.Uint32(attr.Value[0:4])) case NDA_VLAN: neigh.Vlan = int(native.Uint16(attr.Value[0:2])) case NDA_VNI: @@ -308,13 +339,13 @@ func NeighDeserialize(m []byte) (*Neigh, error) { // NeighSubscribe takes a chan down which notifications will be sent // when neighbors are added or deleted. Close the 'done' chan to stop subscription. func NeighSubscribe(ch chan<- NeighUpdate, done <-chan struct{}) error { - return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // NeighSubscribeAt works like NeighSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func NeighSubscribeAt(ns netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}) error { - return neighSubscribeAt(ns, netns.None(), ch, done, nil, false) + return neighSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // NeighSubscribeOptions contains a set of options to use with @@ -323,6 +354,11 @@ type NeighSubscribeOptions struct { Namespace *netns.NsHandle ErrorCallback func(error) ListExisting bool + + // max size is based on value of /proc/sys/net/core/rmem_max + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // NeighSubscribeWithOptions work like NeighSubscribe but enable to @@ -333,16 +369,17 @@ func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, opti none := netns.None() options.Namespace = &none } - return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) makeRequest := func(family int) error { - req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, - unix.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) + req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP) + ndmsg := &Ndmsg{Family: uint8(family)} + req.AddData(ndmsg) if err := s.Send(req); err != nil { return err } @@ -351,6 +388,17 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done < if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -396,13 +444,12 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done < continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + nError := int32(native.Uint32(m.Data[0:4])) + if nError == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(syscall.Errno(-nError)) } return } diff --git a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go index 42d3acf9180..da12c42a560 100644 --- a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -16,7 +16,7 @@ func LinkSetMTU(link Link, mtu int) error { return ErrNotImplemented } -func LinkSetMaster(link Link, master *Bridge) error { +func LinkSetMaster(link Link, master Link) error { return ErrNotImplemented } @@ -52,6 +52,10 @@ func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { return ErrNotImplemented } +func LinkSetVfVlanQosProto(link Link, vf, vlan, qos, proto int) error { + return ErrNotImplemented +} + func LinkSetVfTxRate(link Link, vf, rate int) error { return ErrNotImplemented } @@ -72,6 +76,10 @@ func LinkSetXdpFd(link Link, fd int) error { return ErrNotImplemented } +func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { + return ErrNotImplemented +} + func LinkSetARPOff(link Link) error { return ErrNotImplemented } @@ -120,6 +128,22 @@ func LinkSetTxQLen(link Link, qlen int) error { return ErrNotImplemented } +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGROMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGSOIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGROIPv4MaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + func LinkAdd(link Link) error { return ErrNotImplemented } @@ -176,14 +200,34 @@ func RouteAdd(route *Route) error { return ErrNotImplemented } +func RouteAppend(route *Route) error { + return ErrNotImplemented +} + +func RouteChange(route *Route) error { + return ErrNotImplemented +} + func RouteDel(route *Route) error { return ErrNotImplemented } +func RouteGet(destination net.IP) ([]Route, error) { + return nil, ErrNotImplemented +} + func RouteList(link Link, family int) ([]Route, error) { return nil, ErrNotImplemented } +func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { + return nil, ErrNotImplemented +} + +func RouteReplace(route *Route) error { + return ErrNotImplemented +} + func XfrmPolicyAdd(policy *XfrmPolicy) error { return ErrNotImplemented } @@ -196,6 +240,10 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) { return nil, ErrNotImplemented } +func XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) { + return nil, ErrNotImplemented +} + func XfrmStateAdd(policy *XfrmState) error { return ErrNotImplemented } @@ -235,3 +283,7 @@ func NeighDeserialize(m []byte) (*Neigh, error) { func SocketGet(local, remote net.Addr) (*Socket, error) { return nil, ErrNotImplemented } + +func SocketDestroy(local, remote net.Addr) (*Socket, error) { + return nil, ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/netns_linux.go b/vendor/github.com/vishvananda/netlink/netns_linux.go index 77cf6f46901..2eb29c7ce01 100644 --- a/vendor/github.com/vishvananda/netlink/netns_linux.go +++ b/vendor/github.com/vishvananda/netlink/netns_linux.go @@ -87,7 +87,7 @@ func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) { rtgen := nl.NewRtGenMsg() req.AddData(rtgen) - b := make([]byte, 4, 4) + b := make([]byte, 4) native.PutUint32(b, val) attr := nl.NewRtAttr(attrType, b) req.AddData(attr) @@ -126,12 +126,12 @@ func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error { rtgen := nl.NewRtGenMsg() req.AddData(rtgen) - b := make([]byte, 4, 4) + b := make([]byte, 4) native.PutUint32(b, val) attr := nl.NewRtAttr(attrType, b) req.AddData(attr) - b1 := make([]byte, 4, 4) + b1 := make([]byte, 4) native.PutUint32(b1, newnsid) attr1 := nl.NewRtAttr(NETNSA_NSID, b1) req.AddData(attr1) diff --git a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go index 50db3b4cdd8..6bea4ed02f0 100644 --- a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go @@ -54,24 +54,18 @@ func (msg *IfAddrmsg) Len() int { // __u32 tstamp; /* updated timestamp, hundredths of seconds */ // }; -const IFA_CACHEINFO = 6 -const SizeofIfaCacheInfo = 0x10 - type IfaCacheInfo struct { - IfaPrefered uint32 - IfaValid uint32 - Cstamp uint32 - Tstamp uint32 + unix.IfaCacheinfo } func (msg *IfaCacheInfo) Len() int { - return SizeofIfaCacheInfo + return unix.SizeofIfaCacheinfo } func DeserializeIfaCacheInfo(b []byte) *IfaCacheInfo { - return (*IfaCacheInfo)(unsafe.Pointer(&b[0:SizeofIfaCacheInfo][0])) + return (*IfaCacheInfo)(unsafe.Pointer(&b[0:unix.SizeofIfaCacheinfo][0])) } func (msg *IfaCacheInfo) Serialize() []byte { - return (*(*[SizeofIfaCacheInfo]byte)(unsafe.Pointer(msg)))[:] + return (*(*[unix.SizeofIfaCacheinfo]byte)(unsafe.Pointer(msg)))[:] } diff --git a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go index 79d2b6b8909..6989d1edc0b 100644 --- a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go @@ -15,6 +15,38 @@ var L4ProtoMap = map[uint8]string{ 17: "udp", } +// From https://git.netfilter.org/libnetfilter_conntrack/tree/include/libnetfilter_conntrack/libnetfilter_conntrack_tcp.h +// enum tcp_state { +// TCP_CONNTRACK_NONE, +// TCP_CONNTRACK_SYN_SENT, +// TCP_CONNTRACK_SYN_RECV, +// TCP_CONNTRACK_ESTABLISHED, +// TCP_CONNTRACK_FIN_WAIT, +// TCP_CONNTRACK_CLOSE_WAIT, +// TCP_CONNTRACK_LAST_ACK, +// TCP_CONNTRACK_TIME_WAIT, +// TCP_CONNTRACK_CLOSE, +// TCP_CONNTRACK_LISTEN, /* obsolete */ +// #define TCP_CONNTRACK_SYN_SENT2 TCP_CONNTRACK_LISTEN +// TCP_CONNTRACK_MAX, +// TCP_CONNTRACK_IGNORE +// }; +const ( + TCP_CONNTRACK_NONE = 0 + TCP_CONNTRACK_SYN_SENT = 1 + TCP_CONNTRACK_SYN_RECV = 2 + TCP_CONNTRACK_ESTABLISHED = 3 + TCP_CONNTRACK_FIN_WAIT = 4 + TCP_CONNTRACK_CLOSE_WAIT = 5 + TCP_CONNTRACK_LAST_ACK = 6 + TCP_CONNTRACK_TIME_WAIT = 7 + TCP_CONNTRACK_CLOSE = 8 + TCP_CONNTRACK_LISTEN = 9 + TCP_CONNTRACK_SYN_SENT2 = 9 + TCP_CONNTRACK_MAX = 10 + TCP_CONNTRACK_IGNORE = 11 +) + // All the following constants are coming from: // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -31,6 +63,7 @@ var L4ProtoMap = map[uint8]string{ // IPCTNL_MSG_MAX // }; const ( + IPCTNL_MSG_CT_NEW = 0 IPCTNL_MSG_CT_GET = 1 IPCTNL_MSG_CT_DELETE = 2 ) @@ -40,9 +73,11 @@ const ( NFNETLINK_V0 = 0 ) -// #define NLA_F_NESTED (1 << 15) const ( - NLA_F_NESTED = (1 << 15) + NLA_F_NESTED uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15) + NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14) + NLA_TYPE_MASK = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + NLA_ALIGNTO uint16 = 4 // #define NLA_ALIGNTO 4 ) // enum ctattr_type { @@ -86,7 +121,10 @@ const ( CTA_COUNTERS_REPLY = 10 CTA_USE = 11 CTA_ID = 12 + CTA_ZONE = 18 CTA_TIMESTAMP = 20 + CTA_LABELS = 22 + CTA_LABELS_MASK = 23 ) // enum ctattr_tuple { @@ -147,7 +185,10 @@ const ( // }; // #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) const ( + CTA_PROTOINFO_UNSPEC = 0 CTA_PROTOINFO_TCP = 1 + CTA_PROTOINFO_DCCP = 2 + CTA_PROTOINFO_SCTP = 3 ) // enum ctattr_protoinfo_tcp { diff --git a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go index db66faaad30..956367b2957 100644 --- a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go @@ -9,17 +9,56 @@ const ( ) const ( - DEVLINK_CMD_GET = 1 - DEVLINK_CMD_ESWITCH_GET = 29 - DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_GET = 1 + DEVLINK_CMD_PORT_GET = 5 + DEVLINK_CMD_PORT_SET = 6 + DEVLINK_CMD_PORT_NEW = 7 + DEVLINK_CMD_PORT_DEL = 8 + DEVLINK_CMD_ESWITCH_GET = 29 + DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_RESOURCE_DUMP = 36 + DEVLINK_CMD_PARAM_GET = 38 + DEVLINK_CMD_PARAM_SET = 39 + DEVLINK_CMD_INFO_GET = 51 ) const ( - DEVLINK_ATTR_BUS_NAME = 1 - DEVLINK_ATTR_DEV_NAME = 2 - DEVLINK_ATTR_ESWITCH_MODE = 25 - DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 - DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 + DEVLINK_ATTR_BUS_NAME = 1 + DEVLINK_ATTR_DEV_NAME = 2 + DEVLINK_ATTR_PORT_INDEX = 3 + DEVLINK_ATTR_PORT_TYPE = 4 + DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6 + DEVLINK_ATTR_PORT_NETDEV_NAME = 7 + DEVLINK_ATTR_PORT_IBDEV_NAME = 8 + DEVLINK_ATTR_ESWITCH_MODE = 25 + DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 + DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 + DEVLINK_ATTR_RESOURCE_LIST = 63 /* nested */ + DEVLINK_ATTR_RESOURCE = 64 /* nested */ + DEVLINK_ATTR_RESOURCE_NAME = 65 /* string */ + DEVLINK_ATTR_RESOURCE_ID = 66 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE = 67 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_NEW = 68 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_VALID = 69 /* u8 */ + DEVLINK_ATTR_RESOURCE_SIZE_MIN = 70 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_MAX = 71 /* u64 */ + DEVLINK_ATTR_RESOURCE_SIZE_GRAN = 72 /* u64 */ + DEVLINK_ATTR_RESOURCE_UNIT = 73 /* u8 */ + DEVLINK_ATTR_RESOURCE_OCC = 74 /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID = 75 /* u64 */ + DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS = 76 /* u64 */ + DEVLINK_ATTR_PORT_FLAVOUR = 77 + DEVLINK_ATTR_INFO_DRIVER_NAME = 98 + DEVLINK_ATTR_INFO_SERIAL_NUMBER = 99 + DEVLINK_ATTR_INFO_VERSION_FIXED = 100 + DEVLINK_ATTR_INFO_VERSION_RUNNING = 101 + DEVLINK_ATTR_INFO_VERSION_STORED = 102 + DEVLINK_ATTR_INFO_VERSION_NAME = 103 + DEVLINK_ATTR_INFO_VERSION_VALUE = 104 + DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127 + DEVLINK_ATTR_PORT_FUNCTION = 145 + DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150 + DEVLINK_ATTR_PORT_PCI_SF_NUMBER = 164 ) const ( @@ -38,3 +77,66 @@ const ( DEVLINK_ESWITCH_ENCAP_MODE_NONE = 0 DEVLINK_ESWITCH_ENCAP_MODE_BASIC = 1 ) + +const ( + DEVLINK_PORT_FLAVOUR_PHYSICAL = 0 + DEVLINK_PORT_FLAVOUR_CPU = 1 + DEVLINK_PORT_FLAVOUR_DSA = 2 + DEVLINK_PORT_FLAVOUR_PCI_PF = 3 + DEVLINK_PORT_FLAVOUR_PCI_VF = 4 + DEVLINK_PORT_FLAVOUR_VIRTUAL = 5 + DEVLINK_PORT_FLAVOUR_UNUSED = 6 + DEVLINK_PORT_FLAVOUR_PCI_SF = 7 +) + +const ( + DEVLINK_PORT_TYPE_NOTSET = 0 + DEVLINK_PORT_TYPE_AUTO = 1 + DEVLINK_PORT_TYPE_ETH = 2 + DEVLINK_PORT_TYPE_IB = 3 +) + +const ( + DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 1 + DEVLINK_PORT_FN_ATTR_STATE = 2 + DEVLINK_PORT_FN_ATTR_OPSTATE = 3 +) + +const ( + DEVLINK_PORT_FN_STATE_INACTIVE = 0 + DEVLINK_PORT_FN_STATE_ACTIVE = 1 +) + +const ( + DEVLINK_PORT_FN_OPSTATE_DETACHED = 0 + DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1 +) + +const ( + DEVLINK_RESOURCE_UNIT_ENTRY uint8 = 0 +) + +const ( + DEVLINK_ATTR_PARAM = iota + 80 /* nested */ + DEVLINK_ATTR_PARAM_NAME /* string */ + DEVLINK_ATTR_PARAM_GENERIC /* flag */ + DEVLINK_ATTR_PARAM_TYPE /* u8 */ + DEVLINK_ATTR_PARAM_VALUES_LIST /* nested */ + DEVLINK_ATTR_PARAM_VALUE /* nested */ + DEVLINK_ATTR_PARAM_VALUE_DATA /* dynamic */ + DEVLINK_ATTR_PARAM_VALUE_CMODE /* u8 */ +) + +const ( + DEVLINK_PARAM_TYPE_U8 = 1 + DEVLINK_PARAM_TYPE_U16 = 2 + DEVLINK_PARAM_TYPE_U32 = 3 + DEVLINK_PARAM_TYPE_STRING = 5 + DEVLINK_PARAM_TYPE_BOOL = 6 +) + +const ( + DEVLINK_PARAM_CMODE_RUNTIME = iota + DEVLINK_PARAM_CMODE_DRIVERINIT + DEVLINK_PARAM_CMODE_PERMANENT +) diff --git a/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go b/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go new file mode 100644 index 00000000000..d5dd69e0c40 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/ip6tnl_linux.go @@ -0,0 +1,21 @@ +package nl + +// id's of route attribute from https://elixir.bootlin.com/linux/v5.17.3/source/include/uapi/linux/lwtunnel.h#L38 +// the value's size are specified in https://elixir.bootlin.com/linux/v5.17.3/source/net/ipv4/ip_tunnel_core.c#L928 + +const ( + LWTUNNEL_IP6_UNSPEC = iota + LWTUNNEL_IP6_ID + LWTUNNEL_IP6_DST + LWTUNNEL_IP6_SRC + LWTUNNEL_IP6_HOPLIMIT + LWTUNNEL_IP6_TC + LWTUNNEL_IP6_FLAGS + LWTUNNEL_IP6_PAD // not implemented + LWTUNNEL_IP6_OPTS // not implemented + __LWTUNNEL_IP6_MAX +) + + + + diff --git a/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go new file mode 100644 index 00000000000..89dd009df1f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go @@ -0,0 +1,227 @@ +package nl + +import ( + "strconv" + + "golang.org/x/sys/unix" +) + +const ( + /* The protocol version */ + IPSET_PROTOCOL = 6 + + /* The max length of strings including NUL: set and type identifiers */ + IPSET_MAXNAMELEN = 32 + + /* The maximum permissible comment length we will accept over netlink */ + IPSET_MAX_COMMENT_SIZE = 255 +) + +const ( + _ = iota + IPSET_CMD_PROTOCOL /* 1: Return protocol version */ + IPSET_CMD_CREATE /* 2: Create a new (empty) set */ + IPSET_CMD_DESTROY /* 3: Destroy a (empty) set */ + IPSET_CMD_FLUSH /* 4: Remove all elements from a set */ + IPSET_CMD_RENAME /* 5: Rename a set */ + IPSET_CMD_SWAP /* 6: Swap two sets */ + IPSET_CMD_LIST /* 7: List sets */ + IPSET_CMD_SAVE /* 8: Save sets */ + IPSET_CMD_ADD /* 9: Add an element to a set */ + IPSET_CMD_DEL /* 10: Delete an element from a set */ + IPSET_CMD_TEST /* 11: Test an element in a set */ + IPSET_CMD_HEADER /* 12: Get set header data only */ + IPSET_CMD_TYPE /* 13: Get set type */ +) + +/* Attributes at command level */ +const ( + _ = iota + IPSET_ATTR_PROTOCOL /* 1: Protocol version */ + IPSET_ATTR_SETNAME /* 2: Name of the set */ + IPSET_ATTR_TYPENAME /* 3: Typename */ + IPSET_ATTR_REVISION /* 4: Settype revision */ + IPSET_ATTR_FAMILY /* 5: Settype family */ + IPSET_ATTR_FLAGS /* 6: Flags at command level */ + IPSET_ATTR_DATA /* 7: Nested attributes */ + IPSET_ATTR_ADT /* 8: Multiple data containers */ + IPSET_ATTR_LINENO /* 9: Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN /* 10: Minimal supported version number */ + + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME /* Setname at rename/swap */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN /* type rev min */ +) + +/* CADT specific attributes */ +const ( + IPSET_ATTR_IP = 1 + IPSET_ATTR_IP_FROM = 1 + IPSET_ATTR_IP_TO = 2 + IPSET_ATTR_CIDR = 3 + IPSET_ATTR_PORT = 4 + IPSET_ATTR_PORT_FROM = 4 + IPSET_ATTR_PORT_TO = 5 + IPSET_ATTR_TIMEOUT = 6 + IPSET_ATTR_PROTO = 7 + IPSET_ATTR_CADT_FLAGS = 8 + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO /* 9 */ + IPSET_ATTR_MARK = 10 + IPSET_ATTR_MARKMASK = 11 + + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16 + + /* Create-only specific attributes */ + IPSET_ATTR_GC = 3 + iota + IPSET_ATTR_HASHSIZE + IPSET_ATTR_MAXELEM + IPSET_ATTR_NETMASK + IPSET_ATTR_PROBES + IPSET_ATTR_RESIZE + IPSET_ATTR_SIZE + + /* Kernel-only */ + IPSET_ATTR_ELEMENTS + IPSET_ATTR_REFERENCES + IPSET_ATTR_MEMSIZE + + SET_ATTR_CREATE_MAX +) + +const ( + IPSET_ATTR_IPADDR_IPV4 = 1 + IPSET_ATTR_IPADDR_IPV6 = 2 +) + +/* ADT specific attributes */ +const ( + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1 + IPSET_ATTR_NAME + IPSET_ATTR_NAMEREF + IPSET_ATTR_IP2 + IPSET_ATTR_CIDR2 + IPSET_ATTR_IP2_TO + IPSET_ATTR_IFACE + IPSET_ATTR_BYTES + IPSET_ATTR_PACKETS + IPSET_ATTR_COMMENT + IPSET_ATTR_SKBMARK + IPSET_ATTR_SKBPRIO + IPSET_ATTR_SKBQUEUE +) + +/* Flags at CADT attribute level, upper half of cmdattrs */ +const ( + IPSET_FLAG_BIT_BEFORE = 0 + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE) + IPSET_FLAG_BIT_PHYSDEV = 1 + IPSET_FLAG_PHYSDEV = (1 << IPSET_FLAG_BIT_PHYSDEV) + IPSET_FLAG_BIT_NOMATCH = 2 + IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH) + IPSET_FLAG_BIT_WITH_COUNTERS = 3 + IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS) + IPSET_FLAG_BIT_WITH_COMMENT = 4 + IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT) + IPSET_FLAG_BIT_WITH_FORCEADD = 5 + IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD) + IPSET_FLAG_BIT_WITH_SKBINFO = 6 + IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO) + IPSET_FLAG_CADT_MAX = 15 +) + +const ( + IPSET_ERR_PRIVATE = 4096 + iota + IPSET_ERR_PROTOCOL + IPSET_ERR_FIND_TYPE + IPSET_ERR_MAX_SETS + IPSET_ERR_BUSY + IPSET_ERR_EXIST_SETNAME2 + IPSET_ERR_TYPE_MISMATCH + IPSET_ERR_EXIST + IPSET_ERR_INVALID_CIDR + IPSET_ERR_INVALID_NETMASK + IPSET_ERR_INVALID_FAMILY + IPSET_ERR_TIMEOUT + IPSET_ERR_REFERENCED + IPSET_ERR_IPADDR_IPV4 + IPSET_ERR_IPADDR_IPV6 + IPSET_ERR_COUNTER + IPSET_ERR_COMMENT + IPSET_ERR_INVALID_MARKMASK + IPSET_ERR_SKBINFO + + /* Type specific error codes */ + IPSET_ERR_TYPE_SPECIFIC = 4352 +) + +type IPSetError uintptr + +func (e IPSetError) Error() string { + switch int(e) { + case IPSET_ERR_PRIVATE: + return "private" + case IPSET_ERR_PROTOCOL: + return "invalid protocol" + case IPSET_ERR_FIND_TYPE: + return "invalid type" + case IPSET_ERR_MAX_SETS: + return "max sets reached" + case IPSET_ERR_BUSY: + return "busy" + case IPSET_ERR_EXIST_SETNAME2: + return "exist_setname2" + case IPSET_ERR_TYPE_MISMATCH: + return "type mismatch" + case IPSET_ERR_EXIST: + return "exist" + case IPSET_ERR_INVALID_CIDR: + return "invalid cidr" + case IPSET_ERR_INVALID_NETMASK: + return "invalid netmask" + case IPSET_ERR_INVALID_FAMILY: + return "invalid family" + case IPSET_ERR_TIMEOUT: + return "timeout" + case IPSET_ERR_REFERENCED: + return "referenced" + case IPSET_ERR_IPADDR_IPV4: + return "invalid ipv4 address" + case IPSET_ERR_IPADDR_IPV6: + return "invalid ipv6 address" + case IPSET_ERR_COUNTER: + return "invalid counter" + case IPSET_ERR_COMMENT: + return "invalid comment" + case IPSET_ERR_INVALID_MARKMASK: + return "invalid markmask" + case IPSET_ERR_SKBINFO: + return "skbinfo" + default: + return "errno " + strconv.Itoa(int(e)) + } +} + +func GetIpsetFlags(cmd int) int { + switch cmd { + case IPSET_CMD_CREATE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_CREATE + case IPSET_CMD_DESTROY, + IPSET_CMD_FLUSH, + IPSET_CMD_RENAME, + IPSET_CMD_SWAP, + IPSET_CMD_TEST: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_LIST, + IPSET_CMD_SAVE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_ROOT | unix.NLM_F_MATCH | unix.NLM_F_DUMP + case IPSET_CMD_ADD, + IPSET_CMD_DEL: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_HEADER, + IPSET_CMD_TYPE, + IPSET_CMD_PROTOCOL: + return unix.NLM_F_REQUEST + default: + return 0 + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go index afb16a9c18d..0b5be470cb0 100644 --- a/vendor/github.com/vishvananda/netlink/nl/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -1,6 +1,9 @@ package nl import ( + "bytes" + "encoding/binary" + "fmt" "unsafe" ) @@ -28,6 +31,16 @@ const ( IFLA_VLAN_MAX = IFLA_VLAN_PROTOCOL ) +const ( + IFLA_NETKIT_UNSPEC = iota + IFLA_NETKIT_PEER_INFO + IFLA_NETKIT_PRIMARY + IFLA_NETKIT_POLICY + IFLA_NETKIT_PEER_POLICY + IFLA_NETKIT_MODE + IFLA_NETKIT_MAX = IFLA_NETKIT_MODE +) + const ( VETH_INFO_UNSPEC = iota VETH_INFO_PEER @@ -83,7 +96,37 @@ const ( IFLA_BRPORT_PROXYARP IFLA_BRPORT_LEARNING_SYNC IFLA_BRPORT_PROXYARP_WIFI - IFLA_BRPORT_MAX = IFLA_BRPORT_PROXYARP_WIFI + IFLA_BRPORT_ROOT_ID + IFLA_BRPORT_BRIDGE_ID + IFLA_BRPORT_DESIGNATED_PORT + IFLA_BRPORT_DESIGNATED_COST + IFLA_BRPORT_ID + IFLA_BRPORT_NO + IFLA_BRPORT_TOPOLOGY_CHANGE_ACK + IFLA_BRPORT_CONFIG_PENDING + IFLA_BRPORT_MESSAGE_AGE_TIMER + IFLA_BRPORT_FORWARD_DELAY_TIMER + IFLA_BRPORT_HOLD_TIMER + IFLA_BRPORT_FLUSH + IFLA_BRPORT_MULTICAST_ROUTER + IFLA_BRPORT_PAD + IFLA_BRPORT_MCAST_FLOOD + IFLA_BRPORT_MCAST_TO_UCAST + IFLA_BRPORT_VLAN_TUNNEL + IFLA_BRPORT_BCAST_FLOOD + IFLA_BRPORT_GROUP_FWD_MASK + IFLA_BRPORT_NEIGH_SUPPRESS + IFLA_BRPORT_ISOLATED + IFLA_BRPORT_BACKUP_PORT + IFLA_BRPORT_MRP_RING_OPEN + IFLA_BRPORT_MRP_IN_OPEN + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT + IFLA_BRPORT_LOCKED + IFLA_BRPORT_MAB + IFLA_BRPORT_MCAST_N_GROUPS + IFLA_BRPORT_MCAST_MAX_GROUPS + IFLA_BRPORT_MAX = IFLA_BRPORT_MCAST_MAX_GROUPS ) const ( @@ -101,7 +144,9 @@ const ( IFLA_MACVLAN_MACADDR IFLA_MACVLAN_MACADDR_DATA IFLA_MACVLAN_MACADDR_COUNT - IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS + IFLA_MACVLAN_BC_QUEUE_LEN + IFLA_MACVLAN_BC_QUEUE_LEN_USED + IFLA_MACVLAN_MAX = IFLA_MACVLAN_BC_QUEUE_LEN_USED ) const ( @@ -171,6 +216,25 @@ const ( IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE ) +const ( + IFLA_GENEVE_UNSPEC = iota + IFLA_GENEVE_ID // vni + IFLA_GENEVE_REMOTE + IFLA_GENEVE_TTL + IFLA_GENEVE_TOS + IFLA_GENEVE_PORT // destination port + IFLA_GENEVE_COLLECT_METADATA + IFLA_GENEVE_REMOTE6 + IFLA_GENEVE_UDP_CSUM + IFLA_GENEVE_UDP_ZERO_CSUM6_TX + IFLA_GENEVE_UDP_ZERO_CSUM6_RX + IFLA_GENEVE_LABEL + IFLA_GENEVE_TTL_INHERIT + IFLA_GENEVE_DF + IFLA_GENEVE_INNER_PROTO_INHERIT + IFLA_GENEVE_MAX = IFLA_GENEVE_INNER_PROTO_INHERIT +) + const ( IFLA_GRE_UNSPEC = iota IFLA_GRE_LINK @@ -226,7 +290,15 @@ const ( IFLA_VF_TRUST /* Trust state of VF */ IFLA_VF_IB_NODE_GUID /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID /* VF Infiniband port GUID */ - IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID + IFLA_VF_VLAN_LIST /* nested list of vlans, option for QinQ */ + + IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID +) + +const ( + IFLA_VF_VLAN_INFO_UNSPEC = iota + IFLA_VF_VLAN_INFO /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX ) const ( @@ -243,12 +315,15 @@ const ( IFLA_VF_STATS_TX_BYTES IFLA_VF_STATS_BROADCAST IFLA_VF_STATS_MULTICAST - IFLA_VF_STATS_MAX = IFLA_VF_STATS_MULTICAST + IFLA_VF_STATS_RX_DROPPED + IFLA_VF_STATS_TX_DROPPED + IFLA_VF_STATS_MAX = IFLA_VF_STATS_TX_DROPPED ) const ( SizeofVfMac = 0x24 SizeofVfVlan = 0x0c + SizeofVfVlanInfo = 0x10 SizeofVfTxRate = 0x08 SizeofVfRate = 0x0c SizeofVfSpoofchk = 0x08 @@ -304,6 +379,49 @@ func (msg *VfVlan) Serialize() []byte { return (*(*[SizeofVfVlan]byte)(unsafe.Pointer(msg)))[:] } +func DeserializeVfVlanList(b []byte) ([]*VfVlanInfo, error) { + var vfVlanInfoList []*VfVlanInfo + attrs, err := ParseRouteAttr(b) + if err != nil { + return nil, err + } + + for _, element := range attrs { + if element.Attr.Type == IFLA_VF_VLAN_INFO { + vfVlanInfoList = append(vfVlanInfoList, DeserializeVfVlanInfo(element.Value)) + } + } + + if len(vfVlanInfoList) == 0 { + return nil, fmt.Errorf("VF vlan list is defined but no vf vlan info elements were found") + } + + return vfVlanInfoList, nil +} + +// struct ifla_vf_vlan_info { +// __u32 vf; +// __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ +// __u32 qos; +// __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +// }; + +type VfVlanInfo struct { + VfVlan + VlanProto uint16 +} + +func DeserializeVfVlanInfo(b []byte) *VfVlanInfo { + return &VfVlanInfo{ + *(*VfVlan)(unsafe.Pointer(&b[0:SizeofVfVlan][0])), + binary.BigEndian.Uint16(b[SizeofVfVlan:SizeofVfVlanInfo]), + } +} + +func (msg *VfVlanInfo) Serialize() []byte { + return (*(*[SizeofVfVlanInfo]byte)(unsafe.Pointer(msg)))[:] +} + // struct ifla_vf_tx_rate { // __u32 vf; // __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ @@ -326,6 +444,59 @@ func (msg *VfTxRate) Serialize() []byte { return (*(*[SizeofVfTxRate]byte)(unsafe.Pointer(msg)))[:] } +//struct ifla_vf_stats { +// __u64 rx_packets; +// __u64 tx_packets; +// __u64 rx_bytes; +// __u64 tx_bytes; +// __u64 broadcast; +// __u64 multicast; +//}; + +type VfStats struct { + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 +} + +func DeserializeVfStats(b []byte) VfStats { + var vfstat VfStats + stats, err := ParseRouteAttr(b) + if err != nil { + return vfstat + } + var valueVar uint64 + for _, stat := range stats { + if err := binary.Read(bytes.NewBuffer(stat.Value), NativeEndian(), &valueVar); err != nil { + break + } + switch stat.Attr.Type { + case IFLA_VF_STATS_RX_PACKETS: + vfstat.RxPackets = valueVar + case IFLA_VF_STATS_TX_PACKETS: + vfstat.TxPackets = valueVar + case IFLA_VF_STATS_RX_BYTES: + vfstat.RxBytes = valueVar + case IFLA_VF_STATS_TX_BYTES: + vfstat.TxBytes = valueVar + case IFLA_VF_STATS_MULTICAST: + vfstat.Multicast = valueVar + case IFLA_VF_STATS_BROADCAST: + vfstat.Broadcast = valueVar + case IFLA_VF_STATS_RX_DROPPED: + vfstat.RxDropped = valueVar + case IFLA_VF_STATS_TX_DROPPED: + vfstat.TxDropped = valueVar + } + } + return vfstat +} + // struct ifla_vf_rate { // __u32 vf; // __u32 min_tx_rate; /* Min Bandwidth in Mbps */ @@ -478,6 +649,14 @@ const ( IFLA_XDP_MAX = IFLA_XDP_PROG_ID ) +// XDP program attach mode (used as dump value for IFLA_XDP_ATTACHED) +const ( + XDP_ATTACHED_NONE = iota + XDP_ATTACHED_DRV + XDP_ATTACHED_SKB + XDP_ATTACHED_HW +) + const ( IFLA_IPTUN_UNSPEC = iota IFLA_IPTUN_LINK @@ -608,3 +787,32 @@ const ( IFLA_IPOIB_UMCAST IFLA_IPOIB_MAX = IFLA_IPOIB_UMCAST ) + +const ( + IFLA_CAN_UNSPEC = iota + IFLA_CAN_BITTIMING + IFLA_CAN_BITTIMING_CONST + IFLA_CAN_CLOCK + IFLA_CAN_STATE + IFLA_CAN_CTRLMODE + IFLA_CAN_RESTART_MS + IFLA_CAN_RESTART + IFLA_CAN_BERR_COUNTER + IFLA_CAN_DATA_BITTIMING + IFLA_CAN_DATA_BITTIMING_CONST + IFLA_CAN_TERMINATION + IFLA_CAN_TERMINATION_CONST + IFLA_CAN_BITRATE_CONST + IFLA_CAN_DATA_BITRATE_CONST + IFLA_CAN_BITRATE_MAX + IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX +) + +const ( + IFLA_BAREUDP_UNSPEC = iota + IFLA_BAREUDP_PORT + IFLA_BAREUDP_ETHERTYPE + IFLA_BAREUDP_SRCPORT_MIN + IFLA_BAREUDP_MULTIPROTO_MODE + IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE +) diff --git a/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go new file mode 100644 index 00000000000..bafd593c4fe --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go @@ -0,0 +1,29 @@ +package nl + +const ( + LWT_BPF_PROG_UNSPEC = iota + LWT_BPF_PROG_FD + LWT_BPF_PROG_NAME + __LWT_BPF_PROG_MAX +) + +const ( + LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1 +) + +const ( + LWT_BPF_UNSPEC = iota + LWT_BPF_IN + LWT_BPF_OUT + LWT_BPF_XMIT + LWT_BPF_XMIT_HEADROOM + __LWT_BPF_MAX +) + +const ( + LWT_BPF_MAX = __LWT_BPF_MAX - 1 +) + +const ( + LWT_BPF_MAX_HEADROOM = 256 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go index aaf56c6715f..6cecc4517a5 100644 --- a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -6,6 +6,7 @@ import ( "encoding/binary" "fmt" "net" + "os" "runtime" "sync" "sync/atomic" @@ -27,7 +28,8 @@ const ( // tc rules or filters, or other more memory requiring data. RECEIVE_BUFFER_SIZE = 65536 // Kernel netlink pid - PidKernel uint32 = 0 + PidKernel uint32 = 0 + SizeofCnMsgOp = 0x18 ) // SupportedNlFamilies contains the list of netlink families this netlink package supports @@ -35,6 +37,12 @@ var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETL var nextSeqNr uint32 +// Default netlink socket timeout, 60s +var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0} + +// ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets +var EnableErrorMessageReporting bool = false + // GetIPFamily returns the family type of a net.IP. func GetIPFamily(ip net.IP) int { if len(ip) <= net.IPv4len { @@ -77,11 +85,69 @@ func Swap32(i uint32) uint32 { return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24 } +const ( + NLMSGERR_ATTR_UNUSED = 0 + NLMSGERR_ATTR_MSG = 1 + NLMSGERR_ATTR_OFFS = 2 + NLMSGERR_ATTR_COOKIE = 3 + NLMSGERR_ATTR_POLICY = 4 +) + type NetlinkRequestData interface { Len() int Serialize() []byte } +const ( + PROC_CN_MCAST_LISTEN = 1 + PROC_CN_MCAST_IGNORE +) + +type CbID struct { + Idx uint32 + Val uint32 +} + +type CnMsg struct { + ID CbID + Seq uint32 + Ack uint32 + Length uint16 + Flags uint16 +} + +type CnMsgOp struct { + CnMsg + // here we differ from the C header + Op uint32 +} + +func NewCnMsg(idx, val, op uint32) *CnMsgOp { + var cm CnMsgOp + + cm.ID.Idx = idx + cm.ID.Val = val + + cm.Ack = 0 + cm.Seq = 1 + cm.Length = uint16(binary.Size(op)) + cm.Op = op + + return &cm +} + +func (msg *CnMsgOp) Serialize() []byte { + return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:] +} + +func DeserializeCnMsgOp(b []byte) *CnMsgOp { + return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0])) +} + +func (msg *CnMsgOp) Len() int { + return SizeofCnMsgOp +} + // IfInfomsg is related to links, but it is used for list requests as well type IfInfomsg struct { unix.IfInfomsg @@ -249,6 +315,12 @@ func (msg *IfInfomsg) EncapType() string { return fmt.Sprintf("unknown%d", msg.Type) } +// Round the length of a netlink message up to align it properly. +// Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license. +func nlmAlignOf(msglen int) int { + return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1) +} + func rtaAlignOf(attrlen int) int { return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1) } @@ -259,6 +331,42 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { return msg } +type Uint32Bitfield struct { + Value uint32 + Selector uint32 +} + +func (a *Uint32Bitfield) Serialize() []byte { + return (*(*[SizeofUint32Bitfield]byte)(unsafe.Pointer(a)))[:] +} + +func DeserializeUint32Bitfield(data []byte) *Uint32Bitfield { + return (*Uint32Bitfield)(unsafe.Pointer(&data[0:SizeofUint32Bitfield][0])) +} + +type Uint32Attribute struct { + Type uint16 + Value uint32 +} + +func (a *Uint32Attribute) Serialize() []byte { + native := NativeEndian() + buf := make([]byte, rtaAlignOf(8)) + native.PutUint16(buf[0:2], 8) + native.PutUint16(buf[2:4], a.Type) + + if a.Type&NLA_F_NET_BYTEORDER != 0 { + binary.BigEndian.PutUint32(buf[4:], a.Value) + } else { + native.PutUint32(buf[4:], a.Value) + } + return buf +} + +func (a *Uint32Attribute) Len() int { + return 8 +} + // Extend RtAttr to handle data and children type RtAttr struct { unix.RtAttr @@ -381,10 +489,30 @@ func (req *NetlinkRequest) AddRawData(data []byte) { req.RawData = append(req.RawData, data...) } -// Execute the request against a the given sockType. +// Execute the request against the given sockType. // Returns a list of netlink messages in serialized format, optionally filtered // by resType. func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) { + var res [][]byte + err := req.ExecuteIter(sockType, resType, func(msg []byte) bool { + res = append(res, msg) + return true + }) + if err != nil { + return nil, err + } + return res, nil +} + +// ExecuteIter executes the request against the given sockType. +// Calls the provided callback func once for each netlink message. +// If the callback returns false, it is not called again, but +// the remaining messages are consumed/discarded. +// +// Thread safety: ExecuteIter holds a lock on the socket until +// it finishes iteration so the callback must not call back into +// the netlink API. +func (req *NetlinkRequest) ExecuteIter(sockType int, resType uint16, f func(msg []byte) bool) error { var ( s *NetlinkSocket err error @@ -401,8 +529,21 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro if s == nil { s, err = getNetlinkSocket(sockType) if err != nil { - return nil, err + return err } + + if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil { + return err + } + if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil { + return err + } + if EnableErrorMessageReporting { + if err := s.SetExtAck(true); err != nil { + return err + } + } + defer s.Close() } else { s.Lock() @@ -410,56 +551,94 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro } if err := s.Send(req); err != nil { - return nil, err + return err } pid, err := s.GetPid() if err != nil { - return nil, err + return err } - var res [][]byte - done: for { msgs, from, err := s.Receive() if err != nil { - return nil, err + return err } if from.Pid != PidKernel { - return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) } for _, m := range msgs { if m.Header.Seq != req.Seq { if sharedSocket { continue } - return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) + return fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) } if m.Header.Pid != pid { continue } - if m.Header.Type == unix.NLMSG_DONE { - break done + + if m.Header.Flags&unix.NLM_F_DUMP_INTR != 0 { + return syscall.Errno(unix.EINTR) } - if m.Header.Type == unix.NLMSG_ERROR { + + if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR { + // NLMSG_DONE might have no payload, if so assume no error. + if m.Header.Type == unix.NLMSG_DONE && len(m.Data) == 0 { + break done + } + native := NativeEndian() - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + errno := int32(native.Uint32(m.Data[0:4])) + if errno == 0 { break done } - return nil, syscall.Errno(-error) + var err error + err = syscall.Errno(-errno) + + unreadData := m.Data[4:] + if m.Header.Flags&unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr { + // Skip the echoed request message. + echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0])) + unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):] + + // Annotate `err` using nlmsgerr attributes. + for len(unreadData) >= syscall.SizeofRtAttr { + attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0])) + attrData := unreadData[syscall.SizeofRtAttr:attr.Len] + + switch attr.Type { + case NLMSGERR_ATTR_MSG: + err = fmt.Errorf("%w: %s", err, unix.ByteSliceToString(attrData)) + default: + // TODO: handle other NLMSGERR_ATTR types + } + + unreadData = unreadData[rtaAlignOf(int(attr.Len)):] + } + } + + return err } if resType != 0 && m.Header.Type != resType { continue } - res = append(res, m.Data) + if cont := f(m.Data); !cont { + // Drain the rest of the messages from the kernel but don't + // pass them to the iterator func. + f = dummyMsgIterFunc + } if m.Header.Flags&unix.NLM_F_MULTI == 0 { break done } } } - return res, nil + return nil +} + +func dummyMsgIterFunc(msg []byte) bool { + return true } // Create a new netlink request from proto and flags @@ -477,8 +656,9 @@ func NewNetlinkRequest(proto, flags int) *NetlinkRequest { } type NetlinkSocket struct { - fd int32 - lsa unix.SockaddrNetlink + fd int32 + file *os.File + lsa unix.SockaddrNetlink sync.Mutex } @@ -487,8 +667,13 @@ func getNetlinkSocket(protocol int) (*NetlinkSocket, error) { if err != nil { return nil, err } + err = unix.SetNonblock(fd, true) + if err != nil { + return nil, err + } s := &NetlinkSocket{ - fd: int32(fd), + fd: int32(fd), + file: os.NewFile(uintptr(fd), "netlink"), } s.lsa.Family = unix.AF_NETLINK if err := unix.Bind(fd, &s.lsa); err != nil { @@ -519,12 +704,14 @@ func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSock // In case of success, the caller is expected to execute the returned function // at the end of the code that needs to be executed in the network namespace. // Example: -// func jobAt(...) error { -// d, err := executeInNetns(...) -// if err != nil { return err} -// defer d() -// < code which needs to be executed in specific netns> -// } +// +// func jobAt(...) error { +// d, err := executeInNetns(...) +// if err != nil { return err} +// defer d() +// < code which needs to be executed in specific netns> +// } +// // TODO: his function probably belongs to netns pkg. func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) { var ( @@ -573,8 +760,13 @@ func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) { if err != nil { return nil, err } + err = unix.SetNonblock(fd, true) + if err != nil { + return nil, err + } s := &NetlinkSocket{ - fd: int32(fd), + fd: int32(fd), + file: os.NewFile(uintptr(fd), "netlink"), } s.lsa.Family = unix.AF_NETLINK @@ -603,33 +795,36 @@ func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*Ne } func (s *NetlinkSocket) Close() { - fd := int(atomic.SwapInt32(&s.fd, -1)) - unix.Close(fd) + s.file.Close() } func (s *NetlinkSocket) GetFd() int { - return int(atomic.LoadInt32(&s.fd)) + return int(s.fd) } func (s *NetlinkSocket) Send(request *NetlinkRequest) error { - fd := int(atomic.LoadInt32(&s.fd)) - if fd < 0 { - return fmt.Errorf("Send called on a closed socket") - } - if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil { - return err - } - return nil + return unix.Sendto(int(s.fd), request.Serialize(), 0, &s.lsa) } func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) { - fd := int(atomic.LoadInt32(&s.fd)) - if fd < 0 { - return nil, nil, fmt.Errorf("Receive called on a closed socket") + rawConn, err := s.file.SyscallConn() + if err != nil { + return nil, nil, err + } + var ( + fromAddr *unix.SockaddrNetlink + rb [RECEIVE_BUFFER_SIZE]byte + nr int + from unix.Sockaddr + innerErr error + ) + err = rawConn.Read(func(fd uintptr) (done bool) { + nr, from, innerErr = unix.Recvfrom(int(fd), rb[:], 0) + return innerErr != unix.EWOULDBLOCK + }) + if innerErr != nil { + err = innerErr } - var fromAddr *unix.SockaddrNetlink - var rb [RECEIVE_BUFFER_SIZE]byte - nr, from, err := unix.Recvfrom(fd, rb[:], 0) if err != nil { return nil, nil, err } @@ -640,8 +835,9 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetli if nr < unix.NLMSG_HDRLEN { return nil, nil, fmt.Errorf("Got short response from netlink") } - rb2 := make([]byte, nr) - copy(rb2, rb[:nr]) + msgLen := nlmAlignOf(nr) + rb2 := make([]byte, msgLen) + copy(rb2, rb[:msgLen]) nl, err := syscall.ParseNetlinkMessage(rb2) if err != nil { return nil, nil, err @@ -663,9 +859,27 @@ func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error { return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout) } +// SetReceiveBufferSize allows to set a receive buffer size on the socket +func (s *NetlinkSocket) SetReceiveBufferSize(size int, force bool) error { + opt := unix.SO_RCVBUF + if force { + opt = unix.SO_RCVBUFFORCE + } + return unix.SetsockoptInt(int(s.fd), unix.SOL_SOCKET, opt, size) +} + +// SetExtAck requests error messages to be reported on the socket +func (s *NetlinkSocket) SetExtAck(enable bool) error { + var enableN int + if enable { + enableN = 1 + } + + return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN) +} + func (s *NetlinkSocket) GetPid() (uint32, error) { - fd := int(atomic.LoadInt32(&s.fd)) - lsa, err := unix.Getsockname(fd) + lsa, err := unix.Getsockname(int(s.fd)) if err != nil { return 0, err } @@ -709,6 +923,12 @@ func Uint16Attr(v uint16) []byte { return bytes } +func BEUint16Attr(v uint16) []byte { + bytes := make([]byte, 2) + binary.BigEndian.PutUint16(bytes, v) + return bytes +} + func Uint32Attr(v uint32) []byte { native := NativeEndian() bytes := make([]byte, 4) @@ -716,6 +936,12 @@ func Uint32Attr(v uint32) []byte { return bytes } +func BEUint32Attr(v uint32) []byte { + bytes := make([]byte, 4) + binary.BigEndian.PutUint32(bytes, v) + return bytes +} + func Uint64Attr(v uint64) []byte { native := NativeEndian() bytes := make([]byte, 8) @@ -723,6 +949,12 @@ func Uint64Attr(v uint64) []byte { return bytes } +func BEUint64Attr(v uint64) []byte { + bytes := make([]byte, 8) + binary.BigEndian.PutUint64(bytes, v) + return bytes +} + func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { var attrs []syscall.NetlinkRouteAttr for len(b) >= unix.SizeofRtAttr { @@ -737,6 +969,22 @@ func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { return attrs, nil } +// ParseRouteAttrAsMap parses provided buffer that contains raw RtAttrs and returns a map of parsed +// atttributes indexed by attribute type or error if occured. +func ParseRouteAttrAsMap(b []byte) (map[uint16]syscall.NetlinkRouteAttr, error) { + attrMap := make(map[uint16]syscall.NetlinkRouteAttr) + + attrs, err := ParseRouteAttr(b) + if err != nil { + return nil, err + } + + for _, attr := range attrs { + attrMap[attr.Attr.Type] = attr + } + return attrMap, nil +} + func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) { a := (*unix.RtAttr)(unsafe.Pointer(&b[0])) if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) { diff --git a/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go new file mode 100644 index 00000000000..7f49125cffa --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go @@ -0,0 +1,79 @@ +package nl + +import ( + "encoding/binary" + "fmt" + "log" +) + +type Attribute struct { + Type uint16 + Value []byte +} + +func ParseAttributes(data []byte) <-chan Attribute { + native := NativeEndian() + result := make(chan Attribute) + + go func() { + i := 0 + for i+4 < len(data) { + length := int(native.Uint16(data[i : i+2])) + attrType := native.Uint16(data[i+2 : i+4]) + + if length < 4 { + log.Printf("attribute 0x%02x has invalid length of %d bytes", attrType, length) + break + } + + if len(data) < i+length { + log.Printf("attribute 0x%02x of length %d is truncated, only %d bytes remaining", attrType, length, len(data)-i) + break + } + + result <- Attribute{ + Type: attrType, + Value: data[i+4 : i+length], + } + i += rtaAlignOf(length) + } + close(result) + }() + + return result +} + +func PrintAttributes(data []byte) { + printAttributes(data, 0) +} + +func printAttributes(data []byte, level int) { + for attr := range ParseAttributes(data) { + for i := 0; i < level; i++ { + print("> ") + } + nested := attr.Type&NLA_F_NESTED != 0 + fmt.Printf("type=%d nested=%v len=%v %v\n", attr.Type&NLA_TYPE_MASK, nested, len(attr.Value), attr.Value) + if nested { + printAttributes(attr.Value, level+1) + } + } +} + +// Uint32 returns the uint32 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint32() uint32 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint32(attr.Value) + } else { + return NativeEndian().Uint32(attr.Value) + } +} + +// Uint64 returns the uint64 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint64() uint64 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint64(attr.Value) + } else { + return NativeEndian().Uint64(attr.Value) + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go index 1224b747def..ce43ee1550c 100644 --- a/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go @@ -11,6 +11,8 @@ const ( const ( RDMA_NLDEV_CMD_GET = 1 RDMA_NLDEV_CMD_SET = 2 + RDMA_NLDEV_CMD_NEWLINK = 3 + RDMA_NLDEV_CMD_DELLINK = 4 RDMA_NLDEV_CMD_SYS_GET = 6 RDMA_NLDEV_CMD_SYS_SET = 7 ) @@ -30,6 +32,8 @@ const ( RDMA_NLDEV_ATTR_PORT_STATE = 12 RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13 RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14 + RDMA_NLDEV_ATTR_NDEV_NAME = 51 + RDMA_NLDEV_ATTR_LINK_TYPE = 65 RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66 RDMA_NLDEV_NET_NS_FD = 68 ) diff --git a/vendor/github.com/vishvananda/netlink/nl/route_linux.go b/vendor/github.com/vishvananda/netlink/nl/route_linux.go index 03c1900ffa8..c26f3bf91ae 100644 --- a/vendor/github.com/vishvananda/netlink/nl/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/route_linux.go @@ -48,7 +48,9 @@ type RtNexthop struct { } func DeserializeRtNexthop(b []byte) *RtNexthop { - return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0])) + return &RtNexthop{ + RtNexthop: *((*unix.RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0]))), + } } func (msg *RtNexthop) Len() int { diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go index 5774cbb15ae..fe88285f20e 100644 --- a/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go @@ -23,7 +23,7 @@ func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool { return false } for i := range s1.Segments { - if s1.Segments[i].Equal(s2.Segments[i]) != true { + if !s1.Segments[i].Equal(s2.Segments[i]) { return false } } @@ -89,7 +89,7 @@ func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) { } buf = buf[12:] if len(buf)%16 != 0 { - err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)\n", len(buf)) + err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf)) return mode, nil, err } for len(buf) > 0 { diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go index 1500177267a..8172b8471f2 100644 --- a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go @@ -12,6 +12,7 @@ const ( SEG6_LOCAL_NH6 SEG6_LOCAL_IIF SEG6_LOCAL_OIF + SEG6_LOCAL_BPF __SEG6_LOCAL_MAX ) const ( @@ -34,6 +35,7 @@ const ( SEG6_LOCAL_ACTION_END_S // 12 SEG6_LOCAL_ACTION_END_AS // 13 SEG6_LOCAL_ACTION_END_AM // 14 + SEG6_LOCAL_ACTION_END_BPF // 15 __SEG6_LOCAL_ACTION_MAX ) const ( @@ -71,6 +73,8 @@ func SEG6LocalActionString(action int) string { return "End.AS" case SEG6_LOCAL_ACTION_END_AM: return "End.AM" + case SEG6_LOCAL_ACTION_END_BPF: + return "End.BPF" } return "unknown" } diff --git a/vendor/github.com/vishvananda/netlink/nl/syscall.go b/vendor/github.com/vishvananda/netlink/nl/syscall.go index f7f7f92e6fa..b5ba039acb3 100644 --- a/vendor/github.com/vishvananda/netlink/nl/syscall.go +++ b/vendor/github.com/vishvananda/netlink/nl/syscall.go @@ -1,6 +1,6 @@ package nl -// syscall package lack of rule atributes type. +// syscall package lack of rule attributes type. // Thus there are defined below const ( FRA_UNSPEC = iota @@ -21,6 +21,13 @@ const ( FRA_TABLE /* Extended table id */ FRA_FWMASK /* mask for netfilter mark */ FRA_OIFNAME + FRA_PAD + FRA_L3MDEV /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE /* UID range */ + FRA_PROTOCOL /* Originator of the rule */ + FRA_IP_PROTO /* ip proto */ + FRA_SPORT_RANGE /* sport */ + FRA_DPORT_RANGE /* dport */ ) // ip rule netlink request types @@ -39,6 +46,7 @@ const ( // socket diags related const ( SOCK_DIAG_BY_FAMILY = 20 /* linux.sock_diag.h */ + SOCK_DESTROY = 21 TCPDIAG_NOCOOKIE = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/ ) diff --git a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go index 501f554b216..0720729a900 100644 --- a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go @@ -1,8 +1,13 @@ package nl import ( + "bytes" "encoding/binary" + "fmt" + "net" "unsafe" + + "golang.org/x/sys/unix" ) // LinkLayer @@ -42,7 +47,14 @@ const ( TCA_FCNT TCA_STATS2 TCA_STAB - TCA_MAX = TCA_STAB + TCA_PAD + TCA_DUMP_INVISIBLE + TCA_CHAIN + TCA_HW_OFFLOAD + TCA_INGRESS_BLOCK + TCA_EGRESS_BLOCK + TCA_DUMP_FLAGS + TCA_MAX = TCA_DUMP_FLAGS ) const ( @@ -56,6 +68,12 @@ const ( TCA_ACT_OPTIONS TCA_ACT_INDEX TCA_ACT_STATS + TCA_ACT_PAD + TCA_ACT_COOKIE + TCA_ACT_FLAGS + TCA_ACT_HW_STATS + TCA_ACT_USED_HW_STATS + TCA_ACT_IN_HW_COUNT TCA_ACT_MAX ) @@ -71,7 +89,11 @@ const ( TCA_STATS_RATE_EST TCA_STATS_QUEUE TCA_STATS_APP - TCA_STATS_MAX = TCA_STATS_APP + TCA_STATS_RATE_EST64 + TCA_STATS_PAD + TCA_STATS_BASIC_HW + TCA_STATS_PKT64 + TCA_STATS_MAX = TCA_STATS_PKT64 ) const ( @@ -83,17 +105,23 @@ const ( SizeofTcNetemCorr = 0x0c SizeofTcNetemReorder = 0x08 SizeofTcNetemCorrupt = 0x08 + SizeOfTcNetemRate = 0x10 SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14 SizeofTcHtbGlob = 0x14 SizeofTcU32Key = 0x10 SizeofTcU32Sel = 0x10 // without keys - SizeofTcGen = 0x14 + SizeofTcGen = 0x16 SizeofTcConnmark = SizeofTcGen + 0x04 + SizeofTcCsum = SizeofTcGen + 0x04 SizeofTcMirred = SizeofTcGen + 0x08 SizeofTcTunnelKey = SizeofTcGen + 0x04 SizeofTcSkbEdit = SizeofTcGen SizeofTcPolice = 2*SizeofTcRateSpec + 0x20 + SizeofTcSfqQopt = 0x0b + SizeofTcSfqRedStats = 0x18 + SizeofTcSfqQoptV1 = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c + SizeofUint32Bitfield = 0x8 ) // struct tcmsg { @@ -127,6 +155,18 @@ func (x *TcMsg) Serialize() []byte { return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:] } +type Tcf struct { + Install uint64 + LastUse uint64 + Expires uint64 + FirstUse uint64 +} + +func DeserializeTcf(b []byte) *Tcf { + const size = int(unsafe.Sizeof(Tcf{})) + return (*Tcf)(unsafe.Pointer(&b[0:size][0])) +} + // struct tcamsg { // unsigned char tca_family; // unsigned char tca__pad1; @@ -333,6 +373,26 @@ func (x *TcNetemCorrupt) Serialize() []byte { return (*(*[SizeofTcNetemCorrupt]byte)(unsafe.Pointer(x)))[:] } +// TcNetemRate is a struct that represents the rate of a netem qdisc +type TcNetemRate struct { + Rate uint32 + PacketOverhead int32 + CellSize uint32 + CellOverhead int32 +} + +func (msg *TcNetemRate) Len() int { + return SizeofTcRateSpec +} + +func DeserializeTcNetemRate(b []byte) *TcNetemRate { + return (*TcNetemRate)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) +} + +func (msg *TcNetemRate) Serialize() []byte { + return (*(*[SizeOfTcNetemRate]byte)(unsafe.Pointer(msg)))[:] +} + // struct tc_tbf_qopt { // struct tc_ratespec rate; // struct tc_ratespec peakrate; @@ -691,6 +751,36 @@ func (x *TcConnmark) Serialize() []byte { return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:] } +const ( + TCA_CSUM_UNSPEC = iota + TCA_CSUM_PARMS + TCA_CSUM_TM + TCA_CSUM_PAD + TCA_CSUM_MAX = TCA_CSUM_PAD +) + +// struct tc_csum { +// tc_gen; +// __u32 update_flags; +// } + +type TcCsum struct { + TcGen + UpdateFlags uint32 +} + +func (msg *TcCsum) Len() int { + return SizeofTcCsum +} + +func DeserializeTcCsum(b []byte) *TcCsum { + return (*TcCsum)(unsafe.Pointer(&b[0:SizeofTcCsum][0])) +} + +func (x *TcCsum) Serialize() []byte { + return (*(*[SizeofTcCsum]byte)(unsafe.Pointer(x)))[:] +} + const ( TCA_ACT_MIRRED = 8 ) @@ -735,7 +825,13 @@ const ( TCA_TUNNEL_KEY_ENC_IPV6_SRC TCA_TUNNEL_KEY_ENC_IPV6_DST TCA_TUNNEL_KEY_ENC_KEY_ID - TCA_TUNNEL_KEY_MAX = TCA_TUNNEL_KEY_ENC_KEY_ID + TCA_TUNNEL_KEY_PAD + TCA_TUNNEL_KEY_ENC_DST_PORT + TCA_TUNNEL_KEY_NO_CSUM + TCA_TUNNEL_KEY_ENC_OPTS + TCA_TUNNEL_KEY_ENC_TOS + TCA_TUNNEL_KEY_ENC_TTL + TCA_TUNNEL_KEY_MAX ) type TcTunnelKey struct { @@ -764,7 +860,8 @@ const ( TCA_SKBEDIT_MARK TCA_SKBEDIT_PAD TCA_SKBEDIT_PTYPE - TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK + TCA_SKBEDIT_MASK + TCA_SKBEDIT_MAX ) type TcSkbEdit struct { @@ -851,6 +948,10 @@ const ( TCA_FQ_FLOW_REFILL_DELAY // flow credit refill delay in usec TCA_FQ_ORPHAN_MASK // mask applied to orphaned skb hashes TCA_FQ_LOW_RATE_THRESHOLD // per packet delay under this rate + TCA_FQ_CE_THRESHOLD // DCTCP-like CE-marking threshold + TCA_FQ_TIMER_SLACK // timer slack + TCA_FQ_HORIZON // time horizon in us + TCA_FQ_HORIZON_DROP // drop packets beyond horizon, or cap their EDT ) const ( @@ -872,3 +973,639 @@ const ( TCA_HFSC_FSC TCA_HFSC_USC ) + +const ( + TCA_FLOWER_UNSPEC = iota + TCA_FLOWER_CLASSID + TCA_FLOWER_INDEV + TCA_FLOWER_ACT + TCA_FLOWER_KEY_ETH_DST /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE /* be16 */ + TCA_FLOWER_KEY_IP_PROTO /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC /* be16 */ + TCA_FLOWER_KEY_TCP_DST /* be16 */ + TCA_FLOWER_KEY_UDP_SRC /* be16 */ + TCA_FLOWER_KEY_UDP_DST /* be16 */ + + TCA_FLOWER_FLAGS + TCA_FLOWER_KEY_VLAN_ID /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC /* be16 */ + TCA_FLOWER_KEY_SCTP_DST /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */ + + TCA_FLOWER_KEY_FLAGS /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */ + + TCA_FLOWER_KEY_ARP_SIP /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_TIP /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_OP /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK /* u8 */ + TCA_FLOWER_KEY_ARP_SHA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */ + + TCA_FLOWER_KEY_IP_TOS /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_IP_TTL /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS + TCA_FLOWER_KEY_ENC_OPTS_MASK + + __TCA_FLOWER_MAX +) + +const TCA_CLS_FLAGS_SKIP_HW = 1 << 0 /* don't offload filter to HW */ +const TCA_CLS_FLAGS_SKIP_SW = 1 << 1 /* don't use filter in SW */ + +// struct tc_sfq_qopt { +// unsigned quantum; /* Bytes per round allocated to flow */ +// int perturb_period; /* Period of hash perturbation */ +// __u32 limit; /* Maximal packets in queue */ +// unsigned divisor; /* Hash divisor */ +// unsigned flows; /* Maximal number of flows */ +// }; + +type TcSfqQopt struct { + Quantum uint8 + Perturb int32 + Limit uint32 + Divisor uint8 + Flows uint8 +} + +func (x *TcSfqQopt) Len() int { + return SizeofTcSfqQopt +} + +func DeserializeTcSfqQopt(b []byte) *TcSfqQopt { + return (*TcSfqQopt)(unsafe.Pointer(&b[0:SizeofTcSfqQopt][0])) +} + +func (x *TcSfqQopt) Serialize() []byte { + return (*(*[SizeofTcSfqQopt]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfqred_stats { +// __u32 prob_drop; /* Early drops, below max threshold */ +// __u32 forced_drop; /* Early drops, after max threshold */ +// __u32 prob_mark; /* Marked packets, below max threshold */ +// __u32 forced_mark; /* Marked packets, after max threshold */ +// __u32 prob_mark_head; /* Marked packets, below max threshold */ +// __u32 forced_mark_head;/* Marked packets, after max threshold */ +// }; +type TcSfqRedStats struct { + ProbDrop uint32 + ForcedDrop uint32 + ProbMark uint32 + ForcedMark uint32 + ProbMarkHead uint32 + ForcedMarkHead uint32 +} + +func (x *TcSfqRedStats) Len() int { + return SizeofTcSfqRedStats +} + +func DeserializeTcSfqRedStats(b []byte) *TcSfqRedStats { + return (*TcSfqRedStats)(unsafe.Pointer(&b[0:SizeofTcSfqRedStats][0])) +} + +func (x *TcSfqRedStats) Serialize() []byte { + return (*(*[SizeofTcSfqRedStats]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfq_qopt_v1 { +// struct tc_sfq_qopt v0; +// unsigned int depth; /* max number of packets per flow */ +// unsigned int headdrop; +// +// /* SFQRED parameters */ +// +// __u32 limit; /* HARD maximal flow queue length (bytes) */ +// __u32 qth_min; /* Min average length threshold (bytes) */ +// __u32 qth_max; /* Max average length threshold (bytes) */ +// unsigned char Wlog; /* log(W) */ +// unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ +// unsigned char Scell_log; /* cell size for idle damping */ +// unsigned char flags; +// __u32 max_P; /* probability, high resolution */ +// +// /* SFQRED stats */ +// +// struct tc_sfqred_stats stats; +// }; +type TcSfqQoptV1 struct { + TcSfqQopt + Depth uint32 + HeadDrop uint32 + Limit uint32 + QthMin uint32 + QthMax uint32 + Wlog byte + Plog byte + ScellLog byte + Flags byte + MaxP uint32 + TcSfqRedStats +} + +func (x *TcSfqQoptV1) Len() int { + return SizeofTcSfqQoptV1 +} + +func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 { + return (*TcSfqQoptV1)(unsafe.Pointer(&b[0:SizeofTcSfqQoptV1][0])) +} + +func (x *TcSfqQoptV1) Serialize() []byte { + return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:] +} + +// IPProto represents Flower ip_proto attribute +type IPProto uint8 + +const ( + IPPROTO_TCP IPProto = unix.IPPROTO_TCP + IPPROTO_UDP IPProto = unix.IPPROTO_UDP + IPPROTO_SCTP IPProto = unix.IPPROTO_SCTP + IPPROTO_ICMP IPProto = unix.IPPROTO_ICMP + IPPROTO_ICMPV6 IPProto = unix.IPPROTO_ICMPV6 +) + +func (i IPProto) Serialize() []byte { + arr := make([]byte, 1) + arr[0] = byte(i) + return arr +} + +func (i IPProto) String() string { + switch i { + case IPPROTO_TCP: + return "tcp" + case IPPROTO_UDP: + return "udp" + case IPPROTO_SCTP: + return "sctp" + case IPPROTO_ICMP: + return "icmp" + case IPPROTO_ICMPV6: + return "icmpv6" + } + return fmt.Sprintf("%d", i) +} + +const ( + MaxOffs = 128 + SizeOfPeditSel = 24 + SizeOfPeditKey = 24 + + TCA_PEDIT_KEY_EX_HTYPE = 1 + TCA_PEDIT_KEY_EX_CMD = 2 +) + +const ( + TCA_PEDIT_UNSPEC = iota + TCA_PEDIT_TM + TCA_PEDIT_PARMS + TCA_PEDIT_PAD + TCA_PEDIT_PARMS_EX + TCA_PEDIT_KEYS_EX + TCA_PEDIT_KEY_EX +) + +// /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It +// * means no specific header type - offset is relative to the network layer +// */ +type PeditHeaderType uint16 + +const ( + TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = iota + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + __PEDIT_HDR_TYPE_MAX +) + +type PeditCmd uint16 + +const ( + TCA_PEDIT_KEY_EX_CMD_SET = 0 + TCA_PEDIT_KEY_EX_CMD_ADD = 1 +) + +type TcPeditSel struct { + TcGen + NKeys uint8 + Flags uint8 +} + +func DeserializeTcPeditKey(b []byte) *TcPeditKey { + return (*TcPeditKey)(unsafe.Pointer(&b[0:SizeOfPeditKey][0])) +} + +func DeserializeTcPedit(b []byte) (*TcPeditSel, []TcPeditKey) { + x := &TcPeditSel{} + copy((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(x)))[:SizeOfPeditSel], b) + + var keys []TcPeditKey + + next := SizeOfPeditKey + var i uint8 + for i = 0; i < x.NKeys; i++ { + keys = append(keys, *DeserializeTcPeditKey(b[next:])) + next += SizeOfPeditKey + } + + return x, keys +} + +type TcPeditKey struct { + Mask uint32 + Val uint32 + Off uint32 + At uint32 + OffMask uint32 + Shift uint32 +} + +type TcPeditKeyEx struct { + HeaderType PeditHeaderType + Cmd PeditCmd +} + +type TcPedit struct { + Sel TcPeditSel + Keys []TcPeditKey + KeysEx []TcPeditKeyEx + Extend uint8 +} + +func (p *TcPedit) Encode(parent *RtAttr) { + parent.AddRtAttr(TCA_ACT_KIND, ZeroTerminated("pedit")) + actOpts := parent.AddRtAttr(TCA_ACT_OPTIONS, nil) + + bbuf := bytes.NewBuffer(make([]byte, 0, int(unsafe.Sizeof(p.Sel)+unsafe.Sizeof(p.Keys)))) + + bbuf.Write((*(*[SizeOfPeditSel]byte)(unsafe.Pointer(&p.Sel)))[:]) + + for i := uint8(0); i < p.Sel.NKeys; i++ { + bbuf.Write((*(*[SizeOfPeditKey]byte)(unsafe.Pointer(&p.Keys[i])))[:]) + } + actOpts.AddRtAttr(TCA_PEDIT_PARMS_EX, bbuf.Bytes()) + + exAttrs := actOpts.AddRtAttr(int(TCA_PEDIT_KEYS_EX|NLA_F_NESTED), nil) + for i := uint8(0); i < p.Sel.NKeys; i++ { + keyAttr := exAttrs.AddRtAttr(int(TCA_PEDIT_KEY_EX|NLA_F_NESTED), nil) + + htypeBuf := make([]byte, 2) + cmdBuf := make([]byte, 2) + + NativeEndian().PutUint16(htypeBuf, uint16(p.KeysEx[i].HeaderType)) + NativeEndian().PutUint16(cmdBuf, uint16(p.KeysEx[i].Cmd)) + + keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_HTYPE, htypeBuf) + keyAttr.AddRtAttr(TCA_PEDIT_KEY_EX_CMD, cmdBuf) + } +} + +func (p *TcPedit) SetEthDst(mac net.HardwareAddr) { + u32 := NativeEndian().Uint32(mac) + u16 := NativeEndian().Uint16(mac[4:]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = uint32(u16) + tKey.Mask = 0xffff0000 + tKey.Off = 4 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetEthSrc(mac net.HardwareAddr) { + u16 := NativeEndian().Uint16(mac) + u32 := NativeEndian().Uint32(mac[2:]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = uint32(u16) << 16 + tKey.Mask = 0x0000ffff + tKey.Off = 4 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Mask = 0 + tKey.Off = 8 + + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv6Src(ip6 net.IP) { + u32 := NativeEndian().Uint32(ip6[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 8 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[4:8]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 12 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[8:12]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 16 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[12:16]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 20 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetDstIP(ip net.IP) { + if ip.To4() != nil { + p.SetIPv4Dst(ip) + } else { + p.SetIPv6Dst(ip) + } +} + +func (p *TcPedit) SetSrcIP(ip net.IP) { + if ip.To4() != nil { + p.SetIPv4Src(ip) + } else { + p.SetIPv6Src(ip) + } +} + +func (p *TcPedit) SetIPv6Dst(ip6 net.IP) { + u32 := NativeEndian().Uint32(ip6[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 24 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[4:8]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 28 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[8:12]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 32 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ + + u32 = NativeEndian().Uint32(ip6[12:16]) + tKey = TcPeditKey{} + tKeyEx = TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 36 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv4Src(ip net.IP) { + u32 := NativeEndian().Uint32(ip[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 12 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +func (p *TcPedit) SetIPv4Dst(ip net.IP) { + u32 := NativeEndian().Uint32(ip[:4]) + + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + tKey.Val = u32 + tKey.Off = 16 + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +// SetDstPort only tcp and udp are supported to set port +func (p *TcPedit) SetDstPort(dstPort uint16, protocol uint8) { + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + switch protocol { + case unix.IPPROTO_TCP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + case unix.IPPROTO_UDP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + default: + return + } + + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + tKey.Val = uint32(Swap16(dstPort)) << 16 + tKey.Mask = 0x0000ffff + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} + +// SetSrcPort only tcp and udp are supported to set port +func (p *TcPedit) SetSrcPort(srcPort uint16, protocol uint8) { + tKey := TcPeditKey{} + tKeyEx := TcPeditKeyEx{} + + switch protocol { + case unix.IPPROTO_TCP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + case unix.IPPROTO_UDP: + tKeyEx.HeaderType = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP + default: + return + } + + tKeyEx.Cmd = TCA_PEDIT_KEY_EX_CMD_SET + + tKey.Val = uint32(Swap16(srcPort)) + tKey.Mask = 0xffff0000 + p.Keys = append(p.Keys, tKey) + p.KeysEx = append(p.KeysEx, tKeyEx) + p.Sel.NKeys++ +} diff --git a/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go b/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go new file mode 100644 index 00000000000..f209125df4a --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/vdpa_linux.go @@ -0,0 +1,41 @@ +package nl + +const ( + VDPA_GENL_NAME = "vdpa" + VDPA_GENL_VERSION = 0x1 +) + +const ( + VDPA_CMD_UNSPEC = iota + VDPA_CMD_MGMTDEV_NEW + VDPA_CMD_MGMTDEV_GET /* can dump */ + VDPA_CMD_DEV_NEW + VDPA_CMD_DEV_DEL + VDPA_CMD_DEV_GET /* can dump */ + VDPA_CMD_DEV_CONFIG_GET /* can dump */ + VDPA_CMD_DEV_VSTATS_GET +) + +const ( + VDPA_ATTR_UNSPEC = iota + VDPA_ATTR_MGMTDEV_BUS_NAME + VDPA_ATTR_MGMTDEV_DEV_NAME + VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES + VDPA_ATTR_DEV_NAME + VDPA_ATTR_DEV_ID + VDPA_ATTR_DEV_VENDOR_ID + VDPA_ATTR_DEV_MAX_VQS + VDPA_ATTR_DEV_MAX_VQ_SIZE + VDPA_ATTR_DEV_MIN_VQ_SIZE + VDPA_ATTR_DEV_NET_CFG_MACADDR + VDPA_ATTR_DEV_NET_STATUS + VDPA_ATTR_DEV_NET_CFG_MAX_VQP + VDPA_ATTR_DEV_NET_CFG_MTU + VDPA_ATTR_DEV_NEGOTIATED_FEATURES + VDPA_ATTR_DEV_MGMTDEV_MAX_VQS + VDPA_ATTR_DEV_SUPPORTED_FEATURES + VDPA_ATTR_DEV_QUEUE_INDEX + VDPA_ATTR_DEV_VENDOR_ATTR_NAME + VDPA_ATTR_DEV_VENDOR_ATTR_VALUE + VDPA_ATTR_DEV_FEATURES +) diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go index dce9073f7b5..cdb318ba557 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -131,7 +131,15 @@ func (x *XfrmAddress) ToIP() net.IP { return ip } -func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet { +// family is only used when x and prefixlen are both 0 +func (x *XfrmAddress) ToIPNet(prefixlen uint8, family uint16) *net.IPNet { + empty := [SizeofXfrmAddress]byte{} + if bytes.Equal(x[:], empty[:]) && prefixlen == 0 { + if family == FAMILY_V6 { + return &net.IPNet{IP: net.ParseIP("::"), Mask: net.CIDRMask(int(prefixlen), 128)} + } + return &net.IPNet{IP: net.ParseIP("0.0.0.0"), Mask: net.CIDRMask(int(prefixlen), 32)} + } ip := x.ToIP() if GetIPFamily(ip) == FAMILY_V4 { return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go index b6290fd5442..e8920b9a69b 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go @@ -13,8 +13,9 @@ const ( SizeofXfrmAlgoAuth = 0x48 SizeofXfrmAlgoAEAD = 0x48 SizeofXfrmEncapTmpl = 0x18 - SizeofXfrmUsersaFlush = 0x8 + SizeofXfrmUsersaFlush = 0x1 SizeofXfrmReplayStateEsn = 0x18 + SizeofXfrmReplayState = 0x0c ) const ( @@ -28,6 +29,11 @@ const ( XFRM_STATE_ESN = 128 ) +const ( + XFRM_SA_XFLAG_DONT_ENCAP_DSCP = 1 + XFRM_SA_XFLAG_OSEQ_MAY_WRAP = 2 +) + // struct xfrm_usersa_id { // xfrm_address_t daddr; // __be32 spi; @@ -103,6 +109,7 @@ func (msg *XfrmStats) Serialize() []byte { // }; // // #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +// #define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 // type XfrmUsersaInfo struct { @@ -332,3 +339,23 @@ func (msg *XfrmReplayStateEsn) Serialize() []byte { // We deliberately do not pass Bmp, as it gets set by the kernel. return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:] } + +// struct xfrm_replay_state { +// __u32 oseq; +// __u32 seq; +// __u32 bitmap; +// }; + +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func DeserializeXfrmReplayState(b []byte) *XfrmReplayState { + return (*XfrmReplayState)(unsafe.Pointer(&b[0:SizeofXfrmReplayState][0])) +} + +func (msg *XfrmReplayState) Serialize() []byte { + return (*(*[SizeofXfrmReplayState]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/proc_event_linux.go b/vendor/github.com/vishvananda/netlink/proc_event_linux.go new file mode 100644 index 00000000000..ac8762bd82e --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/proc_event_linux.go @@ -0,0 +1,208 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "os" + "syscall" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const CN_IDX_PROC = 0x1 + +const ( + PROC_EVENT_NONE = 0x00000000 + PROC_EVENT_FORK = 0x00000001 + PROC_EVENT_EXEC = 0x00000002 + PROC_EVENT_UID = 0x00000004 + PROC_EVENT_GID = 0x00000040 + PROC_EVENT_SID = 0x00000080 + PROC_EVENT_PTRACE = 0x00000100 + PROC_EVENT_COMM = 0x00000200 + PROC_EVENT_COREDUMP = 0x40000000 + PROC_EVENT_EXIT = 0x80000000 +) + +const ( + CN_VAL_PROC = 0x1 + PROC_CN_MCAST_LISTEN = 0x1 +) + +type ProcEventMsg interface { + Pid() uint32 + Tgid() uint32 +} + +type ProcEventHeader struct { + What uint32 + CPU uint32 + Timestamp uint64 +} + +type ProcEvent struct { + ProcEventHeader + Msg ProcEventMsg +} + +func (pe *ProcEvent) setHeader(h ProcEventHeader) { + pe.What = h.What + pe.CPU = h.CPU + pe.Timestamp = h.Timestamp +} + +type ExitProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +func (e *ExitProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExitProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ExecProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 +} + +func (e *ExecProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExecProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ForkProcEvent struct { + ParentPid uint32 + ParentTgid uint32 + ChildPid uint32 + ChildTgid uint32 +} + +func (e *ForkProcEvent) Pid() uint32 { + return e.ParentPid +} + +func (e *ForkProcEvent) Tgid() uint32 { + return e.ParentTgid +} + +type CommProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + Comm [16]byte +} + +func (e *CommProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *CommProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +func ProcEventMonitor(ch chan<- ProcEvent, done <-chan struct{}, errorChan chan<- error) error { + h, err := NewHandle() + if err != nil { + return err + } + defer h.Delete() + + s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_CONNECTOR, CN_IDX_PROC) + if err != nil { + return err + } + + var nlmsg nl.NetlinkRequest + + nlmsg.Pid = uint32(os.Getpid()) + nlmsg.Type = unix.NLMSG_DONE + nlmsg.Len = uint32(unix.SizeofNlMsghdr) + + cm := nl.NewCnMsg(CN_IDX_PROC, CN_VAL_PROC, PROC_CN_MCAST_LISTEN) + nlmsg.AddData(cm) + + s.Send(&nlmsg) + + if done != nil { + go func() { + <-done + s.Close() + }() + } + + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + errorChan <- err + return + } + if from.Pid != nl.PidKernel { + errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return + } + + for _, m := range msgs { + e := parseNetlinkMessage(m) + if e != nil { + ch <- *e + } + } + + } + }() + + return nil +} + +func parseNetlinkMessage(m syscall.NetlinkMessage) *ProcEvent { + if m.Header.Type == unix.NLMSG_DONE { + buf := bytes.NewBuffer(m.Data) + msg := &nl.CnMsg{} + hdr := &ProcEventHeader{} + binary.Read(buf, nl.NativeEndian(), msg) + binary.Read(buf, nl.NativeEndian(), hdr) + + pe := &ProcEvent{} + pe.setHeader(*hdr) + switch hdr.What { + case PROC_EVENT_EXIT: + event := &ExitProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_FORK: + event := &ForkProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_EXEC: + event := &ExecProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_COMM: + event := &CommProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + } + return nil + } + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/protinfo.go b/vendor/github.com/vishvananda/netlink/protinfo.go index 60b23b3742c..0163cba3a8b 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo.go +++ b/vendor/github.com/vishvananda/netlink/protinfo.go @@ -6,14 +6,16 @@ import ( // Protinfo represents bridge flags from netlink. type Protinfo struct { - Hairpin bool - Guard bool - FastLeave bool - RootBlock bool - Learning bool - Flood bool - ProxyArp bool - ProxyArpWiFi bool + Hairpin bool + Guard bool + FastLeave bool + RootBlock bool + Learning bool + Flood bool + ProxyArp bool + ProxyArpWiFi bool + Isolated bool + NeighSuppress bool } // String returns a list of enabled flags @@ -47,6 +49,12 @@ func (prot *Protinfo) String() string { if prot.ProxyArpWiFi { boolStrings = append(boolStrings, "ProxyArpWiFi") } + if prot.Isolated { + boolStrings = append(boolStrings, "Isolated") + } + if prot.NeighSuppress { + boolStrings = append(boolStrings, "NeighSuppress") + } return strings.Join(boolStrings, " ") } diff --git a/vendor/github.com/vishvananda/netlink/protinfo_linux.go b/vendor/github.com/vishvananda/netlink/protinfo_linux.go index 15b65123cef..1ba25d3cd47 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo_linux.go +++ b/vendor/github.com/vishvananda/netlink/protinfo_linux.go @@ -68,6 +68,10 @@ func parseProtinfo(infos []syscall.NetlinkRouteAttr) (pi Protinfo) { pi.ProxyArp = byteToBool(info.Value[0]) case nl.IFLA_BRPORT_PROXYARP_WIFI: pi.ProxyArpWiFi = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_ISOLATED: + pi.Isolated = byteToBool(info.Value[0]) + case nl.IFLA_BRPORT_NEIGH_SUPPRESS: + pi.NeighSuppress = byteToBool(info.Value[0]) } } return diff --git a/vendor/github.com/vishvananda/netlink/qdisc.go b/vendor/github.com/vishvananda/netlink/qdisc.go index af78305ac26..067743d390d 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc.go +++ b/vendor/github.com/vishvananda/netlink/qdisc.go @@ -17,19 +17,29 @@ const ( HANDLE_MIN_EGRESS = 0xFFFFFFF3 ) +const ( + HORIZON_DROP_POLICY_CAP = 0 + HORIZON_DROP_POLICY_DROP = 1 + HORIZON_DROP_POLICY_DEFAULT = 255 +) + type Qdisc interface { Attrs() *QdiscAttrs Type() string } +type QdiscStatistics ClassStatistics + // QdiscAttrs represents a netlink qdisc. A qdisc is associated with a link, // has a handle, a parent and a refcnt. The root qdisc of a device should // have parent == HANDLE_ROOT. type QdiscAttrs struct { - LinkIndex int - Handle uint32 - Parent uint32 - Refcnt uint32 // read only + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only + IngressBlock *uint32 + Statistics *QdiscStatistics } func (q QdiscAttrs) String() string { @@ -113,6 +123,7 @@ type Htb struct { Defcls uint32 Debug uint32 DirectPkts uint32 + DirectQlen *uint32 } func NewHtb(attrs QdiscAttrs) *Htb { @@ -123,6 +134,7 @@ func NewHtb(attrs QdiscAttrs) *Htb { Rate2Quantum: 10, Debug: 0, DirectPkts: 0, + DirectQlen: nil, } } @@ -150,6 +162,7 @@ type NetemQdiscAttrs struct { ReorderCorr float32 // in % CorruptProb float32 // in % CorruptCorr float32 // in % + Rate64 uint64 } func (q NetemQdiscAttrs) String() string { @@ -174,6 +187,7 @@ type Netem struct { ReorderCorr uint32 CorruptProb uint32 CorruptCorr uint32 + Rate64 uint64 } func (netem *Netem) String() string { @@ -210,6 +224,19 @@ func (qdisc *Tbf) Type() string { return "tbf" } +// Clsact is a qdisc for adding filters +type Clsact struct { + QdiscAttrs +} + +func (qdisc *Clsact) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Clsact) Type() string { + return "clsact" +} + // Ingress is a qdisc for adding ingress filters type Ingress struct { QdiscAttrs @@ -278,22 +305,25 @@ type Fq struct { FlowDefaultRate uint32 FlowMaxRate uint32 // called BucketsLog under the hood - Buckets uint32 - FlowRefillDelay uint32 - LowRateThreshold uint32 + Buckets uint32 + FlowRefillDelay uint32 + LowRateThreshold uint32 + Horizon uint32 + HorizonDropPolicy uint8 } func (fq *Fq) String() string { return fmt.Sprintf( - "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v}", - fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, + "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v, Horizon: %v, HorizonDropPolicy: %v}", + fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, fq.Horizon, fq.HorizonDropPolicy, ) } func NewFq(attrs QdiscAttrs) *Fq { return &Fq{ - QdiscAttrs: attrs, - Pacing: 1, + QdiscAttrs: attrs, + Pacing: 1, + HorizonDropPolicy: HORIZON_DROP_POLICY_DEFAULT, } } @@ -308,13 +338,15 @@ func (qdisc *Fq) Type() string { // FQ_Codel (Fair Queuing Controlled Delay) is queuing discipline that combines Fair Queuing with the CoDel AQM scheme. type FqCodel struct { QdiscAttrs - Target uint32 - Limit uint32 - Interval uint32 - ECN uint32 - Flows uint32 - Quantum uint32 - // There are some more attributes here, but support for them seems not ubiquitous + Target uint32 + Limit uint32 + Interval uint32 + ECN uint32 + Flows uint32 + Quantum uint32 + CEThreshold uint32 + DropBatchSize uint32 + MemoryLimit uint32 } func (fqcodel *FqCodel) String() string { @@ -338,3 +370,27 @@ func (qdisc *FqCodel) Attrs() *QdiscAttrs { func (qdisc *FqCodel) Type() string { return "fq_codel" } + +type Sfq struct { + QdiscAttrs + // TODO: Only the simplified options for SFQ are handled here. Support for the extended one can be added later. + Quantum uint8 + Perturb uint8 + Limit uint32 + Divisor uint8 +} + +func (sfq *Sfq) String() string { + return fmt.Sprintf( + "{%v -- Quantum: %v, Perturb: %v, Limit: %v, Divisor: %v}", + sfq.Attrs(), sfq.Quantum, sfq.Perturb, sfq.Limit, sfq.Divisor, + ) +} + +func (qdisc *Sfq) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Sfq) Type() string { + return "sfq" +} diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go index e9eee59084f..e732ae3bd64 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc_linux.go +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "strconv" "strings" + "sync" "syscall" "github.com/vishvananda/netlink/nl" @@ -17,6 +18,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { var lossCorr, delayCorr, duplicateCorr uint32 var reorderProb, reorderCorr uint32 var corruptProb, corruptCorr uint32 + var rate64 uint64 latency := nattrs.Latency loss := Percentage2u32(nattrs.Loss) @@ -57,6 +59,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { corruptProb = Percentage2u32(nattrs.CorruptProb) corruptCorr = Percentage2u32(nattrs.CorruptCorr) + rate64 = nattrs.Rate64 return &Netem{ QdiscAttrs: attrs, @@ -73,6 +76,7 @@ func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { ReorderCorr: reorderCorr, CorruptProb: corruptProb, CorruptCorr: corruptCorr, + Rate64: rate64, } } @@ -159,6 +163,9 @@ func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + if qdisc.Attrs().IngressBlock != nil { + req.AddData(nl.NewRtAttr(nl.TCA_INGRESS_BLOCK, nl.Uint32Attr(*qdisc.Attrs().IngressBlock))) + } options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -194,7 +201,9 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { opt.Debug = qdisc.Debug opt.DirectPkts = qdisc.DirectPkts options.AddRtAttr(nl.TCA_HTB_INIT, opt.Serialize()) - // options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + if qdisc.DirectQlen != nil { + options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, nl.Uint32Attr(*qdisc.DirectQlen)) + } case *Hfsc: opt := nl.TcHfscOpt{} opt.Defcls = qdisc.Defcls @@ -231,6 +240,19 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if reorder.Probability > 0 { options.AddRtAttr(nl.TCA_NETEM_REORDER, reorder.Serialize()) } + // Rate + if qdisc.Rate64 > 0 { + rate := nl.TcNetemRate{} + if qdisc.Rate64 >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_NETEM_RATE64, nl.Uint64Attr(qdisc.Rate64)) + rate.Rate = ^uint32(0) + } else { + rate.Rate = uint32(qdisc.Rate64) + } + options.AddRtAttr(nl.TCA_NETEM_RATE, rate.Serialize()) + } + case *Clsact: + options = nil case *Ingress: // ingress filters must use the proper handle if qdisc.Attrs().Parent != HANDLE_INGRESS { @@ -250,13 +272,24 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if qdisc.Quantum > 0 { options.AddRtAttr(nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum)))) } - + if qdisc.CEThreshold > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_CE_THRESHOLD, nl.Uint32Attr(qdisc.CEThreshold)) + } + if qdisc.DropBatchSize > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_DROP_BATCH_SIZE, nl.Uint32Attr(qdisc.DropBatchSize)) + } + if qdisc.MemoryLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_MEMORY_LIMIT, nl.Uint32Attr(qdisc.MemoryLimit)) + } case *Fq: options.AddRtAttr(nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing)))) if qdisc.Buckets > 0 { options.AddRtAttr(nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets)))) } + if qdisc.PacketLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_PLIMIT, nl.Uint32Attr((uint32(qdisc.PacketLimit)))) + } if qdisc.LowRateThreshold > 0 { options.AddRtAttr(nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold)))) } @@ -278,6 +311,20 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if qdisc.FlowDefaultRate > 0 { options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate)))) } + if qdisc.Horizon > 0 { + options.AddRtAttr(nl.TCA_FQ_HORIZON, nl.Uint32Attr(qdisc.Horizon)) + } + if qdisc.HorizonDropPolicy != HORIZON_DROP_POLICY_DEFAULT { + options.AddRtAttr(nl.TCA_FQ_HORIZON_DROP, nl.Uint8Attr(qdisc.HorizonDropPolicy)) + } + case *Sfq: + opt := nl.TcSfqQoptV1{} + opt.TcSfqQopt.Quantum = qdisc.Quantum + opt.TcSfqQopt.Perturb = int32(qdisc.Perturb) + opt.TcSfqQopt.Limit = qdisc.Limit + opt.TcSfqQopt.Divisor = qdisc.Divisor + + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) default: options = nil } @@ -362,6 +409,10 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { qdisc = &FqCodel{} case "netem": qdisc = &Netem{} + case "sfq": + qdisc = &Sfq{} + case "clsact": + qdisc = &Clsact{} default: qdisc = &GenericQdisc{QdiscType: qdiscType} } @@ -417,9 +468,29 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { if err := parseNetemData(qdisc, attr.Value); err != nil { return nil, err } + case "sfq": + if err := parseSfqData(qdisc, attr.Value); err != nil { + return nil, err + } // no options for ingress } + case nl.TCA_INGRESS_BLOCK: + ingressBlock := new(uint32) + *ingressBlock = native.Uint32(attr.Value) + base.IngressBlock = ingressBlock + case nl.TCA_STATS: + s, err := parseTcStats(attr.Value) + if err != nil { + return nil, err + } + base.Statistics = (*QdiscStatistics)(s) + case nl.TCA_STATS2: + s, err := parseTcStats2(attr.Value) + if err != nil { + return nil, err + } + base.Statistics = (*QdiscStatistics)(s) } } *qdisc.Attrs() = base @@ -446,7 +517,6 @@ func parsePrioData(qdisc Qdisc, value []byte) error { } func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() htb := qdisc.(*Htb) for _, datum := range data { switch datum.Attr.Type { @@ -458,15 +528,14 @@ func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { htb.Debug = opt.Debug htb.DirectPkts = opt.DirectPkts case nl.TCA_HTB_DIRECT_QLEN: - // TODO - //htb.DirectQlen = native.uint32(datum.Value) + directQlen := native.Uint32(datum.Value) + htb.DirectQlen = &directQlen } } return nil } func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() fqCodel := qdisc.(*FqCodel) for _, datum := range data { @@ -483,6 +552,12 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { fqCodel.Flows = native.Uint32(datum.Value) case nl.TCA_FQ_CODEL_QUANTUM: fqCodel.Quantum = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_CE_THRESHOLD: + fqCodel.CEThreshold = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_DROP_BATCH_SIZE: + fqCodel.DropBatchSize = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_MEMORY_LIMIT: + fqCodel.MemoryLimit = native.Uint32(datum.Value) } } return nil @@ -490,13 +565,11 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { func parseHfscData(qdisc Qdisc, data []byte) error { Hfsc := qdisc.(*Hfsc) - native = nl.NativeEndian() Hfsc.Defcls = native.Uint16(data) return nil } func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() fq := qdisc.(*Fq) for _, datum := range data { switch datum.Attr.Type { @@ -522,6 +595,11 @@ func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { fq.FlowMaxRate = native.Uint32(datum.Value) case nl.TCA_FQ_FLOW_DEFAULT_RATE: fq.FlowDefaultRate = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON: + fq.Horizon = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON_DROP: + fq.HorizonDropPolicy = datum.Value[0] + } } return nil @@ -540,6 +618,8 @@ func parseNetemData(qdisc Qdisc, value []byte) error { if err != nil { return err } + var rate *nl.TcNetemRate + var rate64 uint64 for _, datum := range data { switch datum.Attr.Type { case nl.TCA_NETEM_CORR: @@ -555,13 +635,23 @@ func parseNetemData(qdisc Qdisc, value []byte) error { opt := nl.DeserializeTcNetemReorder(datum.Value) netem.ReorderProb = opt.Probability netem.ReorderCorr = opt.Correlation + case nl.TCA_NETEM_RATE: + rate = nl.DeserializeTcNetemRate(datum.Value) + case nl.TCA_NETEM_RATE64: + rate64 = native.Uint64(datum.Value) } } + if rate != nil { + netem.Rate64 = uint64(rate.Rate) + if rate64 > 0 { + netem.Rate64 = rate64 + } + } + return nil } func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() tbf := qdisc.(*Tbf) for _, datum := range data { switch datum.Attr.Type { @@ -582,6 +672,17 @@ func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { return nil } +func parseSfqData(qdisc Qdisc, value []byte) error { + sfq := qdisc.(*Sfq) + opt := nl.DeserializeTcSfqQoptV1(value) + sfq.Quantum = opt.TcSfqQopt.Quantum + sfq.Perturb = uint8(opt.TcSfqQopt.Perturb) + sfq.Limit = opt.TcSfqQopt.Limit + sfq.Divisor = opt.TcSfqQopt.Divisor + + return nil +} + const ( TIME_UNITS_PER_SEC = 1000000 ) @@ -590,6 +691,9 @@ var ( tickInUsec float64 clockFactor float64 hz float64 + + // Without this, the go race detector may report races. + initClockMutex sync.Mutex ) func initClock() { @@ -598,10 +702,10 @@ func initClock() { return } parts := strings.Split(strings.TrimSpace(string(data)), " ") - if len(parts) < 3 { + if len(parts) < 4 { return } - var vals [3]uint64 + var vals [4]uint64 for i := range vals { val, err := strconv.ParseUint(parts[i], 16, 32) if err != nil { @@ -615,10 +719,17 @@ func initClock() { } clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor - hz = float64(vals[0]) + if vals[2] == 1000000 { + // ref https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/lib/utils.c#n963 + hz = float64(vals[3]) + } else { + hz = 100 + } } func TickInUsec() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if tickInUsec == 0.0 { initClock() } @@ -626,6 +737,8 @@ func TickInUsec() float64 { } func ClockFactor() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if clockFactor == 0.0 { initClock() } @@ -633,6 +746,8 @@ func ClockFactor() float64 { } func Hz() float64 { + initClockMutex.Lock() + defer initClockMutex.Unlock() if hz == 0.0 { initClock() } @@ -663,6 +778,11 @@ func latency(rate uint64, limit, buffer uint32) float64 { return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) } -func Xmittime(rate uint64, size uint32) float64 { - return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate)) +func Xmittime(rate uint64, size uint32) uint32 { + // https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/tc/tc_core.c#n62 + return time2Tick(uint32(TIME_UNITS_PER_SEC * (float64(size) / float64(rate)))) +} + +func Xmitsize(rate uint64, ticks uint32) uint32 { + return uint32((float64(rate) * float64(tick2Time(ticks))) / TIME_UNITS_PER_SEC) } diff --git a/vendor/github.com/vishvananda/netlink/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go index 2d0bdc8c36f..036399db6b0 100644 --- a/vendor/github.com/vishvananda/netlink/rdma_link_linux.go +++ b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go @@ -77,28 +77,39 @@ func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) { return &link, nil } -func execRdmaGetLink(req *nl.NetlinkRequest, name string) (*RdmaLink, error) { +func execRdmaSetLink(req *nl.NetlinkRequest) error { + + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func RdmaLinkList() ([]*RdmaLink, error) { + return pkgHandle.RdmaLinkList() +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func (h *Handle) RdmaLinkList() ([]*RdmaLink, error) { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP) msgs, err := req.Execute(unix.NETLINK_RDMA, 0) if err != nil { return nil, err } + + var res []*RdmaLink for _, m := range msgs { link, err := executeOneGetRdmaLink(m) if err != nil { return nil, err } - if link.Attrs.Name == name { - return link, nil - } + res = append(res, link) } - return nil, fmt.Errorf("Rdma device %v not found", name) -} - -func execRdmaSetLink(req *nl.NetlinkRequest) error { - _, err := req.Execute(unix.NETLINK_RDMA, 0) - return err + return res, nil } // RdmaLinkByName finds a link by name and returns a pointer to the object if @@ -110,11 +121,16 @@ func RdmaLinkByName(name string) (*RdmaLink, error) { // RdmaLinkByName finds a link by name and returns a pointer to the object if // found and nil error, otherwise returns error code. func (h *Handle) RdmaLinkByName(name string) (*RdmaLink, error) { - - proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET) - req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP) - - return execRdmaGetLink(req, name) + links, err := h.RdmaLinkList() + if err != nil { + return nil, err + } + for _, link := range links { + if link.Attrs.Name == name { + return link, nil + } + } + return nil, fmt.Errorf("Rdma device %v not found", name) } // RdmaLinkSetName sets the name of the rdma link device. Return nil on success @@ -262,3 +278,54 @@ func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error { return execRdmaSetLink(req) } + +// RdmaLinkDel deletes an rdma link +// +// Similar to: rdma link delete NAME +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkDel(name string) error { + return pkgHandle.RdmaLinkDel(name) +} + +// RdmaLinkDel deletes an rdma link. +func (h *Handle) RdmaLinkDel(name string) error { + link, err := h.RdmaLinkByName(name) + if err != nil { + return err + } + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + b := make([]byte, 4) + native.PutUint32(b, link.Attrs.Index) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)) + + _, err = req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +// Similar to: rdma link add NAME type TYPE netdev NETDEV +// NAME - specifies the new name of the rdma link to add +// TYPE - specifies which rdma type to use. Link types: +// rxe - Soft RoCE driver +// siw - Soft iWARP driver +// NETDEV - specifies the network device to which the link is bound +// +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkAdd(linkName, linkType, netdev string) error { + return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev) +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev))) + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} diff --git a/vendor/github.com/vishvananda/netlink/route.go b/vendor/github.com/vishvananda/netlink/route.go index 58ff1af600f..1b4555d5c51 100644 --- a/vendor/github.com/vishvananda/netlink/route.go +++ b/vendor/github.com/vishvananda/netlink/route.go @@ -11,6 +11,24 @@ type Scope uint8 type NextHopFlag int +const ( + RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) + RT_FILTER_SCOPE + RT_FILTER_TYPE + RT_FILTER_TOS + RT_FILTER_IIF + RT_FILTER_OIF + RT_FILTER_DST + RT_FILTER_SRC + RT_FILTER_GW + RT_FILTER_TABLE + RT_FILTER_HOPLIMIT + RT_FILTER_PRIORITY + RT_FILTER_MARK + RT_FILTER_MASK + RT_FILTER_REALM +) + type Destination interface { Family() int Decode([]byte) error @@ -27,27 +45,46 @@ type Encap interface { Equal(Encap) bool } +//Protocol describe what was the originator of the route +type RouteProtocol int + // Route represents a netlink route. type Route struct { - LinkIndex int - ILinkIndex int - Scope Scope - Dst *net.IPNet - Src net.IP - Gw net.IP - MultiPath []*NexthopInfo - Protocol int - Priority int - Table int - Type int - Tos int - Flags int - MPLSDst *int - NewDst Destination - Encap Encap - MTU int - AdvMSS int - Hoplimit int + LinkIndex int + ILinkIndex int + Scope Scope + Dst *net.IPNet + Src net.IP + Gw net.IP + MultiPath []*NexthopInfo + Protocol RouteProtocol + Priority int + Family int + Table int + Type int + Tos int + Flags int + MPLSDst *int + NewDst Destination + Encap Encap + Via Destination + Realm int + MTU int + Window int + Rtt int + RttVar int + Ssthresh int + Cwnd int + AdvMSS int + Reordering int + Hoplimit int + InitCwnd int + Features int + RtoMin int + InitRwnd int + QuickACK int + Congctl string + FastOpenNoCookie int } func (r Route) String() string { @@ -66,6 +103,9 @@ func (r Route) String() string { if r.Encap != nil { elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap)) } + if r.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", r.Via)) + } elems = append(elems, fmt.Sprintf("Src: %s", r.Src)) if len(r.MultiPath) > 0 { elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath)) @@ -74,6 +114,7 @@ func (r Route) String() string { } elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags())) elems = append(elems, fmt.Sprintf("Table: %d", r.Table)) + elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm)) return fmt.Sprintf("{%s}", strings.Join(elems, " ")) } @@ -87,6 +128,7 @@ func (r Route) Equal(x Route) bool { nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) && r.Protocol == x.Protocol && r.Priority == x.Priority && + r.Realm == x.Realm && r.Table == x.Table && r.Type == x.Type && r.Tos == x.Tos && @@ -94,6 +136,7 @@ func (r Route) Equal(x Route) bool { r.Flags == x.Flags && (r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) && (r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) && + (r.Via == x.Via || (r.Via != nil && r.Via.Equal(x.Via))) && (r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap))) } @@ -111,8 +154,15 @@ type flagString struct { } // RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE + +// NlFlags is only non-zero for RTM_NEWROUTE, the following flags can be set: +// - unix.NLM_F_REPLACE - Replace existing matching config object with this request +// - unix.NLM_F_EXCL - Don't replace the config object if it already exists +// - unix.NLM_F_CREATE - Create config object if it doesn't already exist +// - unix.NLM_F_APPEND - Add to the end of the object list type RouteUpdate struct { - Type uint16 + Type uint16 + NlFlags uint16 Route } @@ -123,6 +173,7 @@ type NexthopInfo struct { Flags int NewDst Destination Encap Encap + Via Destination } func (n *NexthopInfo) String() string { @@ -134,6 +185,9 @@ func (n *NexthopInfo) String() string { if n.Encap != nil { elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap)) } + if n.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", n.Via)) + } elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1)) elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw)) elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags())) diff --git a/vendor/github.com/vishvananda/netlink/route_linux.go b/vendor/github.com/vishvananda/netlink/route_linux.go index c69c595ed3c..0cd4f8363a7 100644 --- a/vendor/github.com/vishvananda/netlink/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/route_linux.go @@ -1,8 +1,11 @@ package netlink import ( + "bytes" + "encoding/binary" "fmt" "net" + "strconv" "strings" "syscall" @@ -21,19 +24,22 @@ const ( SCOPE_NOWHERE Scope = unix.RT_SCOPE_NOWHERE ) -const ( - RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) - RT_FILTER_SCOPE - RT_FILTER_TYPE - RT_FILTER_TOS - RT_FILTER_IIF - RT_FILTER_OIF - RT_FILTER_DST - RT_FILTER_SRC - RT_FILTER_GW - RT_FILTER_TABLE - RT_FILTER_HOPLIMIT -) +func (s Scope) String() string { + switch s { + case SCOPE_UNIVERSE: + return "universe" + case SCOPE_SITE: + return "site" + case SCOPE_LINK: + return "link" + case SCOPE_HOST: + return "host" + case SCOPE_NOWHERE: + return "nowhere" + default: + return "unknown" + } +} const ( FLAG_ONLINK NextHopFlag = unix.RTNH_F_ONLINK @@ -128,7 +134,6 @@ func (e *MPLSEncap) Decode(buf []byte) error { if len(buf) < 4 { return fmt.Errorf("lack of bytes") } - native := nl.NativeEndian() l := native.Uint16(buf) if len(buf) < int(l) { return fmt.Errorf("lack of bytes") @@ -144,7 +149,6 @@ func (e *MPLSEncap) Decode(buf []byte) error { func (e *MPLSEncap) Encode() ([]byte, error) { s := nl.EncodeMPLSStack(e.Labels...) - native := nl.NativeEndian() hdr := make([]byte, 4) native.PutUint16(hdr, uint16(len(s)+4)) native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST) @@ -200,7 +204,6 @@ func (e *SEG6Encap) Decode(buf []byte) error { if len(buf) < 4 { return fmt.Errorf("lack of bytes") } - native := nl.NativeEndian() // Get Length(l) & Type(typ) : 2 + 2 bytes l := native.Uint16(buf) if len(buf) < int(l) { @@ -220,7 +223,6 @@ func (e *SEG6Encap) Decode(buf []byte) error { } func (e *SEG6Encap) Encode() ([]byte, error) { s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments) - native := nl.NativeEndian() hdr := make([]byte, 4) native.PutUint16(hdr, uint16(len(s)+4)) native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH) @@ -230,7 +232,7 @@ func (e *SEG6Encap) String() string { segs := make([]string, 0, len(e.Segments)) // append segment backwards (from n to 0) since seg#0 is the last segment. for i := len(e.Segments); i > 0; i-- { - segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1])) + segs = append(segs, e.Segments[i-1].String()) } str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode), len(e.Segments), strings.Join(segs, " ")) @@ -271,6 +273,16 @@ type SEG6LocalEncap struct { In6Addr net.IP Iif int Oif int + bpf bpfObj +} + +func (e *SEG6LocalEncap) SetProg(progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("seg6local bpf SetProg: invalid fd") + } + e.bpf.progFd = progFd + e.bpf.progName = progName + return nil } func (e *SEG6LocalEncap) Type() int { @@ -281,7 +293,6 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error { if err != nil { return err } - native := nl.NativeEndian() for _, attr := range attrs { switch attr.Attr.Type { case nl.SEG6_LOCAL_ACTION: @@ -305,13 +316,28 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error { case nl.SEG6_LOCAL_OIF: e.Oif = int(native.Uint32(attr.Value[0:4])) e.Flags[nl.SEG6_LOCAL_OIF] = true + case nl.SEG6_LOCAL_BPF: + var bpfAttrs []syscall.NetlinkRouteAttr + bpfAttrs, err = nl.ParseRouteAttr(attr.Value) + bpfobj := bpfObj{} + for _, bpfAttr := range bpfAttrs { + switch bpfAttr.Attr.Type { + case nl.LWT_BPF_PROG_FD: + bpfobj.progFd = int(native.Uint32(bpfAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfobj.progName = string(bpfAttr.Value) + default: + err = fmt.Errorf("seg6local bpf decode: unknown attribute: Type %d", bpfAttr.Attr) + } + } + e.bpf = bpfobj + e.Flags[nl.SEG6_LOCAL_BPF] = true } } return err } func (e *SEG6LocalEncap) Encode() ([]byte, error) { var err error - native := nl.NativeEndian() res := make([]byte, 8) native.PutUint16(res, 8) // length native.PutUint16(res[2:], nl.SEG6_LOCAL_ACTION) @@ -367,6 +393,16 @@ func (e *SEG6LocalEncap) Encode() ([]byte, error) { native.PutUint32(attr[4:], uint32(e.Oif)) res = append(res, attr...) } + if e.Flags[nl.SEG6_LOCAL_BPF] { + attr := nl.NewRtAttr(nl.SEG6_LOCAL_BPF, []byte{}) + if e.bpf.progFd != 0 { + attr.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(e.bpf.progFd))) + } + if e.bpf.progName != "" { + attr.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(e.bpf.progName)) + } + res = append(res, attr.Serialize()...) + } return res, err } func (e *SEG6LocalEncap) String() string { @@ -400,12 +436,15 @@ func (e *SEG6LocalEncap) String() string { } if e.Flags[nl.SEG6_LOCAL_SRH] { segs := make([]string, 0, len(e.Segments)) - //append segment backwards (from n to 0) since seg#0 is the last segment. + // append segment backwards (from n to 0) since seg#0 is the last segment. for i := len(e.Segments); i > 0; i-- { - segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1])) + segs = append(segs, e.Segments[i-1].String()) } strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " "))) } + if e.Flags[nl.SEG6_LOCAL_BPF] { + strs = append(strs, fmt.Sprintf("bpf %s[%d]", e.bpf.progName, e.bpf.progFd)) + } return strings.Join(strs, " ") } func (e *SEG6LocalEncap) Equal(x Encap) bool { @@ -437,12 +476,316 @@ func (e *SEG6LocalEncap) Equal(x Encap) bool { if !e.InAddr.Equal(o.InAddr) || !e.In6Addr.Equal(o.In6Addr) { return false } - if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif { + if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif || e.bpf != o.bpf { + return false + } + return true +} + +// Encap BPF definitions +type bpfObj struct { + progFd int + progName string +} +type BpfEncap struct { + progs [nl.LWT_BPF_MAX]bpfObj + headroom int +} + +// SetProg adds a bpf function to the route via netlink RTA_ENCAP. The fd must be a bpf +// program loaded with bpf(type=BPF_PROG_TYPE_LWT_*) matching the direction the program should +// be applied to (LWT_BPF_IN, LWT_BPF_OUT, LWT_BPF_XMIT). +func (e *BpfEncap) SetProg(mode, progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("lwt bpf SetProg: invalid fd") + } + if mode <= nl.LWT_BPF_UNSPEC || mode >= nl.LWT_BPF_XMIT_HEADROOM { + return fmt.Errorf("lwt bpf SetProg:invalid mode") + } + e.progs[mode].progFd = progFd + e.progs[mode].progName = fmt.Sprintf("%s[fd:%d]", progName, progFd) + return nil +} + +// SetXmitHeadroom sets the xmit headroom (LWT_BPF_MAX_HEADROOM) via netlink RTA_ENCAP. +// maximum headroom is LWT_BPF_MAX_HEADROOM +func (e *BpfEncap) SetXmitHeadroom(headroom int) error { + if headroom > nl.LWT_BPF_MAX_HEADROOM || headroom < 0 { + return fmt.Errorf("invalid headroom size. range is 0 - %d", nl.LWT_BPF_MAX_HEADROOM) + } + e.headroom = headroom + return nil +} + +func (e *BpfEncap) Type() int { + return nl.LWTUNNEL_ENCAP_BPF +} +func (e *BpfEncap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lwt bpf decode: lack of bytes") + } + native := nl.NativeEndian() + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing attribute. err: %v", err) + } + for _, attr := range attrs { + if int(attr.Attr.Type) < 1 { + // nl.LWT_BPF_UNSPEC + continue + } + if int(attr.Attr.Type) > nl.LWT_BPF_MAX { + return fmt.Errorf("lwt bpf decode: received unknown attribute type: %d", attr.Attr.Type) + } + switch int(attr.Attr.Type) { + case nl.LWT_BPF_MAX_HEADROOM: + e.headroom = int(native.Uint32(attr.Value)) + default: + bpfO := bpfObj{} + parsedAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing route attribute") + } + for _, parsedAttr := range parsedAttrs { + switch int(parsedAttr.Attr.Type) { + case nl.LWT_BPF_PROG_FD: + bpfO.progFd = int(native.Uint32(parsedAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfO.progName = string(parsedAttr.Value) + default: + return fmt.Errorf("lwt bpf decode: received unknown attribute: type: %d, len: %d", parsedAttr.Attr.Type, parsedAttr.Attr.Len) + } + } + e.progs[attr.Attr.Type] = bpfO + } + } + return nil +} + +func (e *BpfEncap) Encode() ([]byte, error) { + buf := make([]byte, 0) + native = nl.NativeEndian() + for index, attr := range e.progs { + nlMsg := nl.NewRtAttr(index, []byte{}) + if attr.progFd != 0 { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(attr.progFd))) + } + if attr.progName != "" { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(attr.progName)) + } + if nlMsg.Len() > 4 { + buf = append(buf, nlMsg.Serialize()...) + } + } + if len(buf) <= 4 { + return nil, fmt.Errorf("lwt bpf encode: bpf obj definitions returned empty buffer") + } + if e.headroom > 0 { + hRoom := nl.NewRtAttr(nl.LWT_BPF_XMIT_HEADROOM, nl.Uint32Attr(uint32(e.headroom))) + buf = append(buf, hRoom.Serialize()...) + } + return buf, nil +} + +func (e *BpfEncap) String() string { + progs := make([]string, 0) + for index, obj := range e.progs { + empty := bpfObj{} + switch index { + case nl.LWT_BPF_IN: + if obj != empty { + progs = append(progs, fmt.Sprintf("in: %s", obj.progName)) + } + case nl.LWT_BPF_OUT: + if obj != empty { + progs = append(progs, fmt.Sprintf("out: %s", obj.progName)) + } + case nl.LWT_BPF_XMIT: + if obj != empty { + progs = append(progs, fmt.Sprintf("xmit: %s", obj.progName)) + } + } + } + if e.headroom > 0 { + progs = append(progs, fmt.Sprintf("xmit headroom: %d", e.headroom)) + } + return strings.Join(progs, " ") +} + +func (e *BpfEncap) Equal(x Encap) bool { + o, ok := x.(*BpfEncap) + if !ok { + return false + } + if e.headroom != o.headroom { + return false + } + for i := range o.progs { + if o.progs[i] != e.progs[i] { + return false + } + } + return true +} + +// IP6tnlEncap definition +type IP6tnlEncap struct { + ID uint64 + Dst net.IP + Src net.IP + Hoplimit uint8 + TC uint8 + Flags uint16 +} + +func (e *IP6tnlEncap) Type() int { + return nl.LWTUNNEL_ENCAP_IP6 +} + +func (e *IP6tnlEncap) Decode(buf []byte) error { + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return err + } + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.LWTUNNEL_IP6_ID: + e.ID = uint64(native.Uint64(attr.Value[0:4])) + case nl.LWTUNNEL_IP6_DST: + e.Dst = net.IP(attr.Value[:]) + case nl.LWTUNNEL_IP6_SRC: + e.Src = net.IP(attr.Value[:]) + case nl.LWTUNNEL_IP6_HOPLIMIT: + e.Hoplimit = attr.Value[0] + case nl.LWTUNNEL_IP6_TC: + // e.TC = attr.Value[0] + err = fmt.Errorf("decoding TC in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_FLAGS: + // e.Flags = uint16(native.Uint16(attr.Value[0:2])) + err = fmt.Errorf("decoding FLAG in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_PAD: + err = fmt.Errorf("decoding PAD in IP6tnlEncap is not supported") + case nl.LWTUNNEL_IP6_OPTS: + err = fmt.Errorf("decoding OPTS in IP6tnlEncap is not supported") + } + } + return err +} + +func (e *IP6tnlEncap) Encode() ([]byte, error) { + + final := []byte{} + + resID := make([]byte, 12) + native.PutUint16(resID, 12) // 2+2+8 + native.PutUint16(resID[2:], nl.LWTUNNEL_IP6_ID) + native.PutUint64(resID[4:], 0) + final = append(final, resID...) + + resDst := make([]byte, 4) + native.PutUint16(resDst, 20) // 2+2+16 + native.PutUint16(resDst[2:], nl.LWTUNNEL_IP6_DST) + resDst = append(resDst, e.Dst...) + final = append(final, resDst...) + + resSrc := make([]byte, 4) + native.PutUint16(resSrc, 20) + native.PutUint16(resSrc[2:], nl.LWTUNNEL_IP6_SRC) + resSrc = append(resSrc, e.Src...) + final = append(final, resSrc...) + + // resTc := make([]byte, 5) + // native.PutUint16(resTc, 5) + // native.PutUint16(resTc[2:], nl.LWTUNNEL_IP6_TC) + // resTc[4] = e.TC + // final = append(final,resTc...) + + resHops := make([]byte, 5) + native.PutUint16(resHops, 5) + native.PutUint16(resHops[2:], nl.LWTUNNEL_IP6_HOPLIMIT) + resHops[4] = e.Hoplimit + final = append(final, resHops...) + + // resFlags := make([]byte, 6) + // native.PutUint16(resFlags, 6) + // native.PutUint16(resFlags[2:], nl.LWTUNNEL_IP6_FLAGS) + // native.PutUint16(resFlags[4:], e.Flags) + // final = append(final,resFlags...) + + return final, nil +} + +func (e *IP6tnlEncap) String() string { + return fmt.Sprintf("id %d src %s dst %s hoplimit %d tc %d flags 0x%.4x", e.ID, e.Src, e.Dst, e.Hoplimit, e.TC, e.Flags) +} + +func (e *IP6tnlEncap) Equal(x Encap) bool { + o, ok := x.(*IP6tnlEncap) + if !ok { + return false + } + + if e.ID != o.ID || e.Flags != o.Flags || e.Hoplimit != o.Hoplimit || e.Src.Equal(o.Src) || e.Dst.Equal(o.Dst) || e.TC != o.TC { return false } return true } +type Via struct { + AddrFamily int + Addr net.IP +} + +func (v *Via) Equal(x Destination) bool { + o, ok := x.(*Via) + if !ok { + return false + } + if v.AddrFamily == x.Family() && v.Addr.Equal(o.Addr) { + return true + } + return false +} + +func (v *Via) String() string { + return fmt.Sprintf("Family: %d, Address: %s", v.AddrFamily, v.Addr.String()) +} + +func (v *Via) Family() int { + return v.AddrFamily +} + +func (v *Via) Encode() ([]byte, error) { + buf := &bytes.Buffer{} + err := binary.Write(buf, native, uint16(v.AddrFamily)) + if err != nil { + return nil, err + } + err = binary.Write(buf, native, v.Addr) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func (v *Via) Decode(b []byte) error { + if len(b) < 6 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.AddrFamily = int(native.Uint16(b[0:2])) + if v.AddrFamily == nl.FAMILY_V4 { + v.Addr = net.IP(b[2:6]) + return nil + } else if v.AddrFamily == nl.FAMILY_V6 { + if len(b) < 18 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.Addr = net.IP(b[2:]) + return nil + } + return fmt.Errorf("decoding failed: address family %d unknown", v.AddrFamily) +} + // RouteAdd will add a route to the system. // Equivalent to: `ip route add $route` func RouteAdd(route *Route) error { @@ -454,7 +797,51 @@ func RouteAdd(route *Route) error { func (h *Handle) RouteAdd(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func RouteAppend(route *Route) error { + return pkgHandle.RouteAppend(route) +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func (h *Handle) RouteAppend(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err +} + +// RouteAddEcmp will add a route to the system. +func RouteAddEcmp(route *Route) error { + return pkgHandle.RouteAddEcmp(route) +} + +// RouteAddEcmp will add a route to the system. +func (h *Handle) RouteAddEcmp(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err +} + +// RouteChange will change an existing route in the system. +// Equivalent to: `ip route change $route` +func RouteChange(route *Route) error { + return pkgHandle.RouteChange(route) +} + +// RouteChange will change an existing route in the system. +// Equivalent to: `ip route change $route` +func (h *Handle) RouteChange(route *Route) error { + flags := unix.NLM_F_REPLACE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteReplace will add a route to the system. @@ -468,7 +855,8 @@ func RouteReplace(route *Route) error { func (h *Handle) RouteReplace(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteDel will delete a route from the system. @@ -481,12 +869,27 @@ func RouteDel(route *Route) error { // Equivalent to: `ip route del $route` func (h *Handle) RouteDel(route *Route) error { req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK) - return h.routeHandle(route, req, nl.NewRtDelMsg()) + _, err := h.routeHandle(route, req, nl.NewRtDelMsg()) + return err } -func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { - if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { - return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil") +func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) ([][]byte, error) { + if err := h.prepareRouteReq(route, req, msg); err != nil { + return nil, err + } + return req.Execute(unix.NETLINK_ROUTE, 0) +} + +func (h *Handle) routeHandleIter(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg, f func(msg []byte) bool) error { + if err := h.prepareRouteReq(route, req, msg); err != nil { + return err + } + return req.ExecuteIter(unix.NETLINK_ROUTE, 0, f) +} + +func (h *Handle) prepareRouteReq(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { + if req.NlMsghdr.Type != unix.RTM_GETROUTE && (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { + return fmt.Errorf("either Dst.IP, Src.IP or Gw must be set") } family := -1 @@ -530,7 +933,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if err != nil { return err } - rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + switch route.Encap.Type() { + case nl.LWTUNNEL_ENCAP_BPF: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP|unix.NLA_F_NESTED, buf)) + default: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + } + } if route.Src != nil { @@ -564,6 +973,14 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData)) } + if route.Via != nil { + buf, err := route.Via.Encode() + if err != nil { + return fmt.Errorf("failed to encode RTA_VIA: %v", err) + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_VIA, buf)) + } + if len(route.MultiPath) > 0 { buf := []byte{} for _, nh := range route.MultiPath { @@ -606,6 +1023,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg } children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf)) } + if nh.Via != nil { + buf, err := nh.Via.Encode() + if err != nil { + return err + } + children = append(children, nl.NewRtAttr(unix.RTA_VIA, buf)) + } rtnh.Children = children buf = append(buf, rtnh.Serialize()...) } @@ -628,6 +1052,11 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg native.PutUint32(b, uint32(route.Priority)) rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b)) } + if route.Realm > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.Realm)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_FLOW, b)) + } if route.Tos > 0 { msg.Tos = uint8(route.Tos) } @@ -639,19 +1068,70 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg } var metrics []*nl.RtAttr - // TODO: support other rta_metric values if route.MTU > 0 { b := nl.Uint32Attr(uint32(route.MTU)) metrics = append(metrics, nl.NewRtAttr(unix.RTAX_MTU, b)) } + if route.Window > 0 { + b := nl.Uint32Attr(uint32(route.Window)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_WINDOW, b)) + } + if route.Rtt > 0 { + b := nl.Uint32Attr(uint32(route.Rtt)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTT, b)) + } + if route.RttVar > 0 { + b := nl.Uint32Attr(uint32(route.RttVar)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTTVAR, b)) + } + if route.Ssthresh > 0 { + b := nl.Uint32Attr(uint32(route.Ssthresh)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_SSTHRESH, b)) + } + if route.Cwnd > 0 { + b := nl.Uint32Attr(uint32(route.Cwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CWND, b)) + } if route.AdvMSS > 0 { b := nl.Uint32Attr(uint32(route.AdvMSS)) metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b)) } + if route.Reordering > 0 { + b := nl.Uint32Attr(uint32(route.Reordering)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_REORDERING, b)) + } if route.Hoplimit > 0 { b := nl.Uint32Attr(uint32(route.Hoplimit)) metrics = append(metrics, nl.NewRtAttr(unix.RTAX_HOPLIMIT, b)) } + if route.InitCwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitCwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITCWND, b)) + } + if route.Features > 0 { + b := nl.Uint32Attr(uint32(route.Features)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FEATURES, b)) + } + if route.RtoMin > 0 { + b := nl.Uint32Attr(uint32(route.RtoMin)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTO_MIN, b)) + } + if route.InitRwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitRwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITRWND, b)) + } + if route.QuickACK > 0 { + b := nl.Uint32Attr(uint32(route.QuickACK)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_QUICKACK, b)) + } + if route.Congctl != "" { + b := nl.ZeroTerminated(route.Congctl) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CC_ALGO, b)) + } + if route.FastOpenNoCookie > 0 { + b := nl.Uint32Attr(uint32(route.FastOpenNoCookie)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FASTOPEN_NO_COOKIE, b)) + } if metrics != nil { attr := nl.NewRtAttr(unix.RTA_METRICS, nil) @@ -663,22 +1143,21 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg msg.Flags = uint32(route.Flags) msg.Scope = uint8(route.Scope) - msg.Family = uint8(family) + // only overwrite family if it was not set in msg + if msg.Family == 0 { + msg.Family = uint8(family) + } req.AddData(msg) for _, attr := range rtAttrs { req.AddData(attr) } - var ( - b = make([]byte, 4) - native = nl.NativeEndian() - ) - native.PutUint32(b, uint32(route.LinkIndex)) - - req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) - - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + if (req.NlMsghdr.Type != unix.RTM_GETROUTE) || (req.NlMsghdr.Type == unix.RTM_GETROUTE && route.LinkIndex > 0) { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.LinkIndex)) + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + return nil } // RouteList gets a list of routes in the system. @@ -692,13 +1171,13 @@ func RouteList(link Link, family int) ([]Route, error) { // Equivalent to: `ip route show`. // The list can be filtered by link and ip family. func (h *Handle) RouteList(link Link, family int) ([]Route, error) { - var routeFilter *Route + routeFilter := &Route{} if link != nil { - routeFilter = &Route{ - LinkIndex: link.Attrs().Index, - } + routeFilter.LinkIndex = link.Attrs().Index + + return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) } - return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) + return h.RouteListFiltered(family, routeFilter, 0) } // RouteListFiltered gets a list of routes in the system filtered with specified rules. @@ -710,65 +1189,94 @@ func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, e // RouteListFiltered gets a list of routes in the system filtered with specified rules. // All rules must be defined in RouteFilter struct func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { - req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) - - msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) + var res []Route + err := h.RouteListFilteredIter(family, filter, filterMask, func(route Route) (cont bool) { + res = append(res, route) + return true + }) if err != nil { return nil, err } + return res, nil +} - var res []Route - for _, m := range msgs { +// RouteListFilteredIter passes each route that matches the filter to the given iterator func. Iteration continues +// until all routes are loaded or the func returns false. +func RouteListFilteredIter(family int, filter *Route, filterMask uint64, f func(Route) (cont bool)) error { + return pkgHandle.RouteListFilteredIter(family, filter, filterMask, f) +} + +func (h *Handle) RouteListFilteredIter(family int, filter *Route, filterMask uint64, f func(Route) (cont bool)) error { + req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) + rtmsg := &nl.RtMsg{} + rtmsg.Family = uint8(family) + + var parseErr error + err := h.routeHandleIter(filter, req, rtmsg, func(m []byte) bool { msg := nl.DeserializeRtMsg(m) + if family != FAMILY_ALL && msg.Family != uint8(family) { + // Ignore routes not matching requested family + return true + } if msg.Flags&unix.RTM_F_CLONED != 0 { // Ignore cloned routes - continue + return true } if msg.Table != unix.RT_TABLE_MAIN { - if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 { + if filter == nil || filterMask&RT_FILTER_TABLE == 0 { // Ignore non-main tables - continue + return true } } route, err := deserializeRoute(m) if err != nil { - return nil, err + parseErr = err + return false } if filter != nil { switch { case filterMask&RT_FILTER_TABLE != 0 && filter.Table != unix.RT_TABLE_UNSPEC && route.Table != filter.Table: - continue + return true case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol: - continue + return true case filterMask&RT_FILTER_SCOPE != 0 && route.Scope != filter.Scope: - continue + return true case filterMask&RT_FILTER_TYPE != 0 && route.Type != filter.Type: - continue + return true case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos: - continue + return true + case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm: + return true case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex: - continue + return true case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex: - continue + return true case filterMask&RT_FILTER_GW != 0 && !route.Gw.Equal(filter.Gw): - continue + return true case filterMask&RT_FILTER_SRC != 0 && !route.Src.Equal(filter.Src): - continue + return true case filterMask&RT_FILTER_DST != 0: if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) { + if filter.Dst == nil { + filter.Dst = genZeroIPNet(family) + } if !ipNetEqual(route.Dst, filter.Dst) { - continue + return true } } case filterMask&RT_FILTER_HOPLIMIT != 0 && route.Hoplimit != filter.Hoplimit: - continue + return true } } - res = append(res, route) + return f(route) + }) + if err != nil { + return err } - return res, nil + if parseErr != nil { + return parseErr + } + return nil } // deserializeRoute decodes a binary netlink message into a Route struct @@ -780,14 +1288,14 @@ func deserializeRoute(m []byte) (Route, error) { } route := Route{ Scope: Scope(msg.Scope), - Protocol: int(msg.Protocol), + Protocol: RouteProtocol(int(msg.Protocol)), Table: int(msg.Table), Type: int(msg.Type), Tos: int(msg.Tos), Flags: int(msg.Flags), + Family: int(msg.Family), } - native := nl.NativeEndian() var encap, encapType syscall.NetlinkRouteAttr for _, attr := range attrs { switch attr.Attr.Type { @@ -814,6 +1322,8 @@ func deserializeRoute(m []byte) (Route, error) { route.ILinkIndex = int(native.Uint32(attr.Value[0:4])) case unix.RTA_PRIORITY: route.Priority = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_FLOW: + route.Realm = int(native.Uint32(attr.Value[0:4])) case unix.RTA_TABLE: route.Table = int(native.Uint32(attr.Value[0:4])) case unix.RTA_MULTIPATH: @@ -853,6 +1363,12 @@ func deserializeRoute(m []byte) (Route, error) { encapType = attr case unix.RTA_ENCAP: encap = attr + case unix.RTA_VIA: + d := &Via{} + if err := d.Decode(attr.Value); err != nil { + return nil, nil, err + } + info.Via = d } } @@ -890,6 +1406,12 @@ func deserializeRoute(m []byte) (Route, error) { return route, err } route.NewDst = d + case unix.RTA_VIA: + v := &Via{} + if err := v.Decode(attr.Value); err != nil { + return route, err + } + route.Via = v case unix.RTA_ENCAP_TYPE: encapType = attr case unix.RTA_ENCAP: @@ -903,15 +1425,62 @@ func deserializeRoute(m []byte) (Route, error) { switch metric.Attr.Type { case unix.RTAX_MTU: route.MTU = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_WINDOW: + route.Window = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTT: + route.Rtt = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTTVAR: + route.RttVar = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_SSTHRESH: + route.Ssthresh = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CWND: + route.Cwnd = int(native.Uint32(metric.Value[0:4])) case unix.RTAX_ADVMSS: route.AdvMSS = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_REORDERING: + route.Reordering = int(native.Uint32(metric.Value[0:4])) case unix.RTAX_HOPLIMIT: route.Hoplimit = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITCWND: + route.InitCwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_FEATURES: + route.Features = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTO_MIN: + route.RtoMin = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITRWND: + route.InitRwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_QUICKACK: + route.QuickACK = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CC_ALGO: + route.Congctl = nl.BytesToString(metric.Value) + case unix.RTAX_FASTOPEN_NO_COOKIE: + route.FastOpenNoCookie = int(native.Uint32(metric.Value[0:4])) } } } } + // Same logic to generate "default" dst with iproute2 implementation + if route.Dst == nil { + var addLen int + var ip net.IP + switch msg.Family { + case FAMILY_V4: + addLen = net.IPv4len + ip = net.IPv4zero + case FAMILY_V6: + addLen = net.IPv6len + ip = net.IPv6zero + } + + if addLen != 0 { + route.Dst = &net.IPNet{ + IP: ip, + Mask: net.CIDRMask(int(msg.Dst_len), 8*addLen), + } + } + } + if len(encap.Value) != 0 && len(encapType.Value) != 0 { typ := int(native.Uint16(encapType.Value[0:2])) var e Encap @@ -931,6 +1500,11 @@ func deserializeRoute(m []byte) (Route, error) { if err := e.Decode(encap.Value); err != nil { return route, err } + case nl.LWTUNNEL_ENCAP_BPF: + e = &BpfEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } } route.Encap = e } @@ -938,15 +1512,34 @@ func deserializeRoute(m []byte) (Route, error) { return route, nil } +// RouteGetOptions contains a set of options to use with +// RouteGetWithOptions +type RouteGetOptions struct { + Iif string + IifIndex int + Oif string + VrfName string + SrcAddr net.IP + UID *uint32 + Mark uint32 + FIBMatch bool +} + +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { + return pkgHandle.RouteGetWithOptions(destination, options) +} + // RouteGet gets a route to a specific destination from the host system. // Equivalent to: 'ip route get'. func RouteGet(destination net.IP) ([]Route, error) { return pkgHandle.RouteGet(destination) } -// RouteGet gets a route to a specific destination from the host system. -// Equivalent to: 'ip route get'. -func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_REQUEST) family := nl.GetIPFamily(destination) var destinationData []byte @@ -961,11 +1554,88 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { msg := &nl.RtMsg{} msg.Family = uint8(family) msg.Dst_len = bitlen + if options != nil && options.SrcAddr != nil { + msg.Src_len = bitlen + } + msg.Flags = unix.RTM_F_LOOKUP_TABLE + if options != nil && options.FIBMatch { + msg.Flags |= unix.RTM_F_FIB_MATCH + } req.AddData(msg) rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData) req.AddData(rtaDst) + if options != nil { + if options.VrfName != "" { + link, err := h.LinkByName(options.VrfName) + if err != nil { + return nil, err + } + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + iifIndex := 0 + if len(options.Iif) > 0 { + link, err := h.LinkByName(options.Iif) + if err != nil { + return nil, err + } + + iifIndex = link.Attrs().Index + } else if options.IifIndex > 0 { + iifIndex = options.IifIndex + } + + if iifIndex > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(iifIndex)) + + req.AddData(nl.NewRtAttr(unix.RTA_IIF, b)) + } + + if len(options.Oif) > 0 { + link, err := h.LinkByName(options.Oif) + if err != nil { + return nil, err + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + if options.SrcAddr != nil { + var srcAddr []byte + if family == FAMILY_V4 { + srcAddr = options.SrcAddr.To4() + } else { + srcAddr = options.SrcAddr.To16() + } + + req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr)) + } + + if options.UID != nil { + uid := *options.UID + b := make([]byte, 4) + native.PutUint32(b, uid) + + req.AddData(nl.NewRtAttr(unix.RTA_UID, b)) + } + + if options.Mark > 0 { + b := make([]byte, 4) + native.PutUint32(b, options.Mark) + + req.AddData(nl.NewRtAttr(unix.RTA_MARK, b)) + } + } + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) if err != nil { return nil, err @@ -980,27 +1650,35 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { res = append(res, route) } return res, nil +} +// RouteGet gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get'. +func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { + return h.RouteGetWithOptions(destination, nil) } // RouteSubscribe takes a chan down which notifications will be sent // when routes are added or deleted. Close the 'done' chan to stop subscription. func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) + return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil, false) } // RouteSubscribeAt works like RouteSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(ns, netns.None(), ch, done, nil, false) + return routeSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil, false) } // RouteSubscribeOptions contains a set of options to use with // RouteSubscribeWithOptions. type RouteSubscribeOptions struct { - Namespace *netns.NsHandle - ErrorCallback func(error) - ListExisting bool + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveBufferForceSize bool + ReceiveTimeout *unix.Timeval } // RouteSubscribeWithOptions work like RouteSubscribe but enable to @@ -1011,14 +1689,27 @@ func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, opti none := netns.None() options.Namespace = &none } - return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) + return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, + options.ReceiveBufferSize, options.ReceiveTimeout, options.ReceiveBufferForceSize) } -func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { +func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool, + rcvbuf int, rcvTimeout *unix.Timeval, rcvbufForce bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE, unix.RTNLGRP_IPV6_ROUTE) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if rcvbuf != 0 { + err = s.SetReceiveBufferSize(rcvbuf, rcvbufForce) + if err != nil { + return err + } + } if done != nil { go func() { <-done @@ -1040,7 +1731,8 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -1055,27 +1747,103 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < continue } if m.Header.Type == unix.NLMSG_ERROR { - native := nl.NativeEndian() error := int32(native.Uint32(m.Data[0:4])) if error == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) } - return + continue } route, err := deserializeRoute(m.Data) if err != nil { if cberr != nil { cberr(err) } - return + continue + } + ch <- RouteUpdate{ + Type: m.Header.Type, + NlFlags: m.Header.Flags & (unix.NLM_F_REPLACE | unix.NLM_F_EXCL | unix.NLM_F_CREATE | unix.NLM_F_APPEND), + Route: route, } - ch <- RouteUpdate{Type: m.Header.Type, Route: route} } } }() return nil } + +func (p RouteProtocol) String() string { + switch int(p) { + case unix.RTPROT_BABEL: + return "babel" + case unix.RTPROT_BGP: + return "bgp" + case unix.RTPROT_BIRD: + return "bird" + case unix.RTPROT_BOOT: + return "boot" + case unix.RTPROT_DHCP: + return "dhcp" + case unix.RTPROT_DNROUTED: + return "dnrouted" + case unix.RTPROT_EIGRP: + return "eigrp" + case unix.RTPROT_GATED: + return "gated" + case unix.RTPROT_ISIS: + return "isis" + // case unix.RTPROT_KEEPALIVED: + // return "keepalived" + case unix.RTPROT_KERNEL: + return "kernel" + case unix.RTPROT_MROUTED: + return "mrouted" + case unix.RTPROT_MRT: + return "mrt" + case unix.RTPROT_NTK: + return "ntk" + case unix.RTPROT_OSPF: + return "ospf" + case unix.RTPROT_RA: + return "ra" + case unix.RTPROT_REDIRECT: + return "redirect" + case unix.RTPROT_RIP: + return "rip" + case unix.RTPROT_STATIC: + return "static" + case unix.RTPROT_UNSPEC: + return "unspec" + case unix.RTPROT_XORP: + return "xorp" + case unix.RTPROT_ZEBRA: + return "zebra" + default: + return strconv.Itoa(int(p)) + } +} + +// genZeroIPNet returns 0.0.0.0/0 or ::/0 for IPv4 or IPv6, otherwise nil +func genZeroIPNet(family int) *net.IPNet { + var addLen int + var ip net.IP + switch family { + case FAMILY_V4: + addLen = net.IPv4len + ip = net.IPv4zero + case FAMILY_V6: + addLen = net.IPv6len + ip = net.IPv6zero + } + if addLen != 0 { + return &net.IPNet{ + IP: ip, + Mask: net.CIDRMask(0, 8*addLen), + } + } + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/route_unspecified.go b/vendor/github.com/vishvananda/netlink/route_unspecified.go index 2701862b4b9..db737268958 100644 --- a/vendor/github.com/vishvananda/netlink/route_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/route_unspecified.go @@ -2,6 +2,8 @@ package netlink +import "strconv" + func (r *Route) ListFlags() []string { return []string{} } @@ -9,3 +11,11 @@ func (r *Route) ListFlags() []string { func (n *NexthopInfo) ListFlags() []string { return []string{} } + +func (s Scope) String() string { + return "unknown" +} + +func (p RouteProtocol) String() string { + return strconv.Itoa(int(p)) +} diff --git a/vendor/github.com/vishvananda/netlink/rule.go b/vendor/github.com/vishvananda/netlink/rule.go index 7fc8ae5df15..9d74c7cd8a9 100644 --- a/vendor/github.com/vishvananda/netlink/rule.go +++ b/vendor/github.com/vishvananda/netlink/rule.go @@ -10,8 +10,9 @@ type Rule struct { Priority int Family int Table int - Mark int - Mask int + Mark uint32 + Mask *uint32 + Tos uint TunID uint Goto int Src *net.IPNet @@ -22,10 +23,27 @@ type Rule struct { SuppressIfgroup int SuppressPrefixlen int Invert bool + Dport *RulePortRange + Sport *RulePortRange + IPProto int + UIDRange *RuleUIDRange + Protocol uint8 + Type uint8 } func (r Rule) String() string { - return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table) + from := "all" + if r.Src != nil && r.Src.String() != "" { + from = r.Src.String() + } + + to := "all" + if r.Dst != nil && r.Dst.String() != "" { + to = r.Dst.String() + } + + return fmt.Sprintf("ip rule %d: from %s to %s table %d %s", + r.Priority, from, to, r.Table, r.typeString()) } // NewRule return empty rules. @@ -34,9 +52,31 @@ func NewRule() *Rule { SuppressIfgroup: -1, SuppressPrefixlen: -1, Priority: -1, - Mark: -1, - Mask: -1, + Mark: 0, + Mask: nil, Goto: -1, Flow: -1, } } + +// NewRulePortRange creates rule sport/dport range. +func NewRulePortRange(start, end uint16) *RulePortRange { + return &RulePortRange{Start: start, End: end} +} + +// RulePortRange represents rule sport/dport range. +type RulePortRange struct { + Start uint16 + End uint16 +} + +// NewRuleUIDRange creates rule uid range. +func NewRuleUIDRange(start, end uint32) *RuleUIDRange { + return &RuleUIDRange{Start: start, End: end} +} + +// RuleUIDRange represents rule uid range. +type RuleUIDRange struct { + Start uint32 + End uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/rule_linux.go b/vendor/github.com/vishvananda/netlink/rule_linux.go index e12569fe45c..ddff99cfad2 100644 --- a/vendor/github.com/vishvananda/netlink/rule_linux.go +++ b/vendor/github.com/vishvananda/netlink/rule_linux.go @@ -1,6 +1,7 @@ package netlink import ( + "bytes" "fmt" "net" @@ -42,8 +43,8 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { msg.Protocol = unix.RTPROT_BOOT msg.Scope = unix.RT_SCOPE_UNIVERSE msg.Table = unix.RT_TABLE_UNSPEC - msg.Type = unix.RTN_UNSPEC - if req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 { + msg.Type = rule.Type // usually 0, same as unix.RTN_UNSPEC + if msg.Type == 0 && req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 { msg.Type = unix.RTN_UNICAST } if rule.Invert { @@ -55,6 +56,9 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { if rule.Table >= 0 && rule.Table < 256 { msg.Table = uint8(rule.Table) } + if rule.Tos != 0 { + msg.Tos = uint8(rule.Tos) + } var dstFamily uint8 var rtAttrs []*nl.RtAttr @@ -93,21 +97,19 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(rtAttrs[i]) } - native := nl.NativeEndian() - if rule.Priority >= 0 { b := make([]byte, 4) native.PutUint32(b, uint32(rule.Priority)) req.AddData(nl.NewRtAttr(nl.FRA_PRIORITY, b)) } - if rule.Mark >= 0 { + if rule.Mark != 0 || rule.Mask != nil { b := make([]byte, 4) - native.PutUint32(b, uint32(rule.Mark)) + native.PutUint32(b, rule.Mark) req.AddData(nl.NewRtAttr(nl.FRA_FWMARK, b)) } - if rule.Mask >= 0 { + if rule.Mask != nil { b := make([]byte, 4) - native.PutUint32(b, uint32(rule.Mask)) + native.PutUint32(b, *rule.Mask) req.AddData(nl.NewRtAttr(nl.FRA_FWMASK, b)) } if rule.Flow >= 0 { @@ -138,10 +140,10 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { } } if rule.IifName != "" { - req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName))) + req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName+"\x00"))) } if rule.OifName != "" { - req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName))) + req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName+"\x00"))) } if rule.Goto >= 0 { msg.Type = nl.FR_ACT_GOTO @@ -150,6 +152,31 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b)) } + if rule.IPProto > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.IPProto)) + req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b)) + } + + if rule.Dport != nil { + b := rule.Dport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b)) + } + + if rule.Sport != nil { + b := rule.Sport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b)) + } + + if rule.UIDRange != nil { + b := rule.UIDRange.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_UID_RANGE, b)) + } + + if rule.Protocol > 0 { + req.AddData(nl.NewRtAttr(nl.FRA_PROTOCOL, nl.Uint8Attr(rule.Protocol))) + } + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -163,6 +190,19 @@ func RuleList(family int) ([]Rule, error) { // RuleList lists rules in the system. // Equivalent to: ip rule list func (h *Handle) RuleList(family int) ([]Rule, error) { + return h.RuleListFiltered(family, nil, 0) +} + +// RuleListFiltered gets a list of rules in the system filtered by the +// specified rule template `filter`. +// Equivalent to: ip rule list +func RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { + return pkgHandle.RuleListFiltered(family, filter, filterMask) +} + +// RuleListFiltered lists rules in the system. +// Equivalent to: ip rule list +func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST) msg := nl.NewIfInfomsg(family) req.AddData(msg) @@ -172,7 +212,6 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { return nil, err } - native := nl.NativeEndian() var res = make([]Rule, 0) for i := range msgs { msg := nl.DeserializeRtMsg(msgs[i]) @@ -182,8 +221,11 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { } rule := NewRule() + rule.Priority = 0 // The default priority from kernel rule.Invert = msg.Flags&FibRuleInvert > 0 + rule.Family = int(msg.Family) + rule.Tos = uint(msg.Tos) for j := range attrs { switch attrs[j].Attr.Type { @@ -200,11 +242,12 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attrs[j].Value)), } case nl.FRA_FWMARK: - rule.Mark = int(native.Uint32(attrs[j].Value[0:4])) + rule.Mark = native.Uint32(attrs[j].Value[0:4]) case nl.FRA_FWMASK: - rule.Mask = int(native.Uint32(attrs[j].Value[0:4])) + mask := native.Uint32(attrs[j].Value[0:4]) + rule.Mask = &mask case nl.FRA_TUN_ID: - rule.TunID = uint(native.Uint64(attrs[j].Value[0:4])) + rule.TunID = uint(native.Uint64(attrs[j].Value[0:8])) case nl.FRA_IIFNAME: rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1]) case nl.FRA_OIFNAME: @@ -225,10 +268,98 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { rule.Goto = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_PRIORITY: rule.Priority = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_IP_PROTO: + rule.IPProto = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_DPORT_RANGE: + rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_SPORT_RANGE: + rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_UID_RANGE: + rule.UIDRange = NewRuleUIDRange(native.Uint32(attrs[j].Value[0:4]), native.Uint32(attrs[j].Value[4:8])) + case nl.FRA_PROTOCOL: + rule.Protocol = uint8(attrs[j].Value[0]) + } + } + + if filter != nil { + switch { + case filterMask&RT_FILTER_SRC != 0 && + (rule.Src == nil || rule.Src.String() != filter.Src.String()): + continue + case filterMask&RT_FILTER_DST != 0 && + (rule.Dst == nil || rule.Dst.String() != filter.Dst.String()): + continue + case filterMask&RT_FILTER_TABLE != 0 && + filter.Table != unix.RT_TABLE_UNSPEC && rule.Table != filter.Table: + continue + case filterMask&RT_FILTER_TOS != 0 && rule.Tos != filter.Tos: + continue + case filterMask&RT_FILTER_PRIORITY != 0 && rule.Priority != filter.Priority: + continue + case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark: + continue + case filterMask&RT_FILTER_MASK != 0 && !ptrEqual(rule.Mask, filter.Mask): + continue } } + res = append(res, *rule) } return res, nil } + +func (pr *RulePortRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 2), make([]byte, 2)} + native.PutUint16(b[0], pr.Start) + native.PutUint16(b[1], pr.End) + return bytes.Join(b, []byte{}) +} + +func (pr *RuleUIDRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 4), make([]byte, 4)} + native.PutUint32(b[0], pr.Start) + native.PutUint32(b[1], pr.End) + return bytes.Join(b, []byte{}) +} + +func ptrEqual(a, b *uint32) bool { + if a == b { + return true + } + if (a == nil) || (b == nil) { + return false + } + return *a == *b +} + +func (r Rule) typeString() string { + switch r.Type { + case unix.RTN_UNSPEC: // zero + return "" + case unix.RTN_UNICAST: + return "" + case unix.RTN_LOCAL: + return "local" + case unix.RTN_BROADCAST: + return "broadcast" + case unix.RTN_ANYCAST: + return "anycast" + case unix.RTN_MULTICAST: + return "multicast" + case unix.RTN_BLACKHOLE: + return "blackhole" + case unix.RTN_UNREACHABLE: + return "unreachable" + case unix.RTN_PROHIBIT: + return "prohibit" + case unix.RTN_THROW: + return "throw" + case unix.RTN_NAT: + return "nat" + case unix.RTN_XRESOLVE: + return "xresolve" + default: + return fmt.Sprintf("type(0x%x)", r.Type) + } +} diff --git a/vendor/github.com/vishvananda/netlink/rule_nonlinux.go b/vendor/github.com/vishvananda/netlink/rule_nonlinux.go new file mode 100644 index 00000000000..2b19aa64c7d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/rule_nonlinux.go @@ -0,0 +1,8 @@ +//go:build !linux +// +build !linux + +package netlink + +func (r Rule) typeString() string { + return "" +} diff --git a/vendor/github.com/vishvananda/netlink/socket.go b/vendor/github.com/vishvananda/netlink/socket.go index 41aa726245b..e65efb130f5 100644 --- a/vendor/github.com/vishvananda/netlink/socket.go +++ b/vendor/github.com/vishvananda/netlink/socket.go @@ -25,3 +25,80 @@ type Socket struct { UID uint32 INode uint32 } + +// UnixSocket represents a netlink unix socket. +type UnixSocket struct { + Type uint8 + Family uint8 + State uint8 + pad uint8 + INode uint32 + Cookie [2]uint32 +} + +// XDPSocket represents an XDP socket (and the common diagnosis part in +// particular). Please note that in contrast to [UnixSocket] the XDPSocket type +// does not feature “State” information. +type XDPSocket struct { + // xdp_diag_msg + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 + Family uint8 + Type uint8 + pad uint16 + Ino uint32 + Cookie [2]uint32 +} + +type XDPInfo struct { + // XDP_DIAG_INFO/xdp_diag_info + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L51 + Ifindex uint32 + QueueID uint32 + + // XDP_DIAG_UID + UID uint32 + + // XDP_RX_RING + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L56 + RxRingEntries uint32 + TxRingEntries uint32 + UmemFillRingEntries uint32 + UmemCompletionRingEntries uint32 + + // XDR_DIAG_UMEM + Umem *XDPDiagUmem + + // XDR_DIAG_STATS + Stats *XDPDiagStats +} + +const ( + XDP_DU_F_ZEROCOPY = 1 << iota +) + +// XDPDiagUmem describes the umem attached to an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L62 +type XDPDiagUmem struct { + Size uint64 + ID uint32 + NumPages uint32 + ChunkSize uint32 + Headroom uint32 + Ifindex uint32 + QueueID uint32 + Flags uint32 + Refs uint32 +} + +// XDPDiagStats contains ring statistics for an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L74 +type XDPDiagStats struct { + RxDropped uint64 + RxInvalid uint64 + RxFull uint64 + FillRingEmpty uint64 + TxInvalid uint64 + TxRingEmpty uint64 +} diff --git a/vendor/github.com/vishvananda/netlink/socket_linux.go b/vendor/github.com/vishvananda/netlink/socket_linux.go index c4d89c17ed5..4eb4aeafbdf 100644 --- a/vendor/github.com/vishvananda/netlink/socket_linux.go +++ b/vendor/github.com/vishvananda/netlink/socket_linux.go @@ -4,15 +4,18 @@ import ( "errors" "fmt" "net" + "syscall" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) const ( - sizeofSocketID = 0x30 - sizeofSocketRequest = sizeofSocketID + 0x8 - sizeofSocket = sizeofSocketID + 0x18 + sizeofSocketID = 0x30 + sizeofSocketRequest = sizeofSocketID + 0x8 + sizeofSocket = sizeofSocketID + 0x18 + sizeofUnixSocketRequest = 0x18 // 24 byte + sizeofUnixSocket = 0x10 // 16 byte ) type socketRequest struct { @@ -49,10 +52,13 @@ func (r *socketRequest) Serialize() []byte { native.PutUint32(b.Next(4), r.States) networkOrder.PutUint16(b.Next(2), r.ID.SourcePort) networkOrder.PutUint16(b.Next(2), r.ID.DestinationPort) - copy(b.Next(4), r.ID.Source.To4()) - b.Next(12) - copy(b.Next(4), r.ID.Destination.To4()) - b.Next(12) + if r.Family == unix.AF_INET6 { + copy(b.Next(16), r.ID.Source) + copy(b.Next(16), r.ID.Destination) + } else { + copy(b.Next(16), r.ID.Source.To4()) + copy(b.Next(16), r.ID.Destination.To4()) + } native.PutUint32(b.Next(4), r.ID.Interface) native.PutUint32(b.Next(4), r.ID.Cookie[0]) native.PutUint32(b.Next(4), r.ID.Cookie[1]) @@ -61,6 +67,32 @@ func (r *socketRequest) Serialize() []byte { func (r *socketRequest) Len() int { return sizeofSocketRequest } +// According to linux/include/uapi/linux/unix_diag.h +type unixSocketRequest struct { + Family uint8 + Protocol uint8 + pad uint16 + States uint32 + INode uint32 + Show uint32 + Cookie [2]uint32 +} + +func (r *unixSocketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofUnixSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + native.PutUint16(b.Next(2), r.pad) + native.PutUint32(b.Next(4), r.States) + native.PutUint32(b.Next(4), r.INode) + native.PutUint32(b.Next(4), r.Show) + native.PutUint32(b.Next(4), r.Cookie[0]) + native.PutUint32(b.Next(4), r.Cookie[1]) + return b.Bytes +} + +func (r *unixSocketRequest) Len() int { return sizeofUnixSocketRequest } + type readBuffer struct { Bytes []byte pos int @@ -89,10 +121,15 @@ func (s *Socket) deserialize(b []byte) error { s.Retrans = rb.Read() s.ID.SourcePort = networkOrder.Uint16(rb.Next(2)) s.ID.DestinationPort = networkOrder.Uint16(rb.Next(2)) - s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) - rb.Next(12) - s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) - rb.Next(12) + if s.Family == unix.AF_INET6 { + s.ID.Source = net.IP(rb.Next(16)) + s.ID.Destination = net.IP(rb.Next(16)) + } else { + s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + } s.ID.Interface = native.Uint32(rb.Next(4)) s.ID.Cookie[0] = native.Uint32(rb.Next(4)) s.ID.Cookie[1] = native.Uint32(rb.Next(4)) @@ -104,31 +141,126 @@ func (s *Socket) deserialize(b []byte) error { return nil } +func (u *UnixSocket) deserialize(b []byte) error { + if len(b) < sizeofUnixSocket { + return fmt.Errorf("unix diag data short read (%d); want %d", len(b), sizeofUnixSocket) + } + rb := readBuffer{Bytes: b} + u.Type = rb.Read() + u.Family = rb.Read() + u.State = rb.Read() + u.pad = rb.Read() + u.INode = native.Uint32(rb.Next(4)) + u.Cookie[0] = native.Uint32(rb.Next(4)) + u.Cookie[1] = native.Uint32(rb.Next(4)) + return nil +} + +// SocketGet returns the Socket identified by its local and remote addresses. +func (h *Handle) SocketGet(local, remote net.Addr) (*Socket, error) { + var protocol uint8 + var localIP, remoteIP net.IP + var localPort, remotePort uint16 + switch l := local.(type) { + case *net.TCPAddr: + r, ok := remote.(*net.TCPAddr) + if !ok { + return nil, ErrNotImplemented + } + localIP = l.IP + localPort = uint16(l.Port) + remoteIP = r.IP + remotePort = uint16(r.Port) + protocol = unix.IPPROTO_TCP + case *net.UDPAddr: + r, ok := remote.(*net.UDPAddr) + if !ok { + return nil, ErrNotImplemented + } + localIP = l.IP + localPort = uint16(l.Port) + remoteIP = r.IP + remotePort = uint16(r.Port) + protocol = unix.IPPROTO_UDP + default: + return nil, ErrNotImplemented + } + + var family uint8 + if localIP.To4() != nil && remoteIP.To4() != nil { + family = unix.AF_INET + } + + if family == 0 && localIP.To16() != nil && remoteIP.To16() != nil { + family = unix.AF_INET6 + } + + if family == 0 { + return nil, ErrNotImplemented + } + + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: protocol, + States: 0xffffffff, + ID: SocketID{ + SourcePort: localPort, + DestinationPort: remotePort, + Source: localIP, + Destination: remoteIP, + Cookie: [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE}, + }, + }) + + msgs, err := req.Execute(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY) + if err != nil { + return nil, err + } + if len(msgs) == 0 { + return nil, errors.New("no message nor error from netlink") + } + if len(msgs) > 2 { + return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs)) + } + + sock := &Socket{} + if err := sock.deserialize(msgs[0]); err != nil { + return nil, err + } + return sock, nil +} + // SocketGet returns the Socket identified by its local and remote addresses. func SocketGet(local, remote net.Addr) (*Socket, error) { + return pkgHandle.SocketGet(local, remote) +} + +// SocketDestroy kills the Socket identified by its local and remote addresses. +func (h *Handle) SocketDestroy(local, remote net.Addr) error { localTCP, ok := local.(*net.TCPAddr) if !ok { - return nil, ErrNotImplemented + return ErrNotImplemented } remoteTCP, ok := remote.(*net.TCPAddr) if !ok { - return nil, ErrNotImplemented + return ErrNotImplemented } localIP := localTCP.IP.To4() if localIP == nil { - return nil, ErrNotImplemented + return ErrNotImplemented } remoteIP := remoteTCP.IP.To4() if remoteIP == nil { - return nil, ErrNotImplemented + return ErrNotImplemented } s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) if err != nil { - return nil, err + return err } defer s.Close() - req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0) + req := h.newNetlinkRequest(nl.SOCK_DESTROY, unix.NLM_F_ACK) req.AddData(&socketRequest{ Family: unix.AF_INET, Protocol: unix.IPPROTO_TCP, @@ -140,23 +272,319 @@ func SocketGet(local, remote net.Addr) (*Socket, error) { Cookie: [2]uint32{nl.TCPDIAG_NOCOOKIE, nl.TCPDIAG_NOCOOKIE}, }, }) - s.Send(req) - msgs, from, err := s.Receive() + + _, err = req.Execute(unix.NETLINK_INET_DIAG, 0) + return err +} + +// SocketDestroy kills the Socket identified by its local and remote addresses. +func SocketDestroy(local, remote net.Addr) error { + return pkgHandle.SocketDestroy(local, remote) +} + +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. +func (h *Handle) SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*InetDiagTCPInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false + } + + var res *InetDiagTCPInfoResp + if res, err = attrsToInetDiagTCPInfoResp(attrs, sockInfo); err != nil { + return false + } + + result = append(result, res) + return true + }) + + if err != nil { + return nil, err + } + return result, nil +} + +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. +func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + return pkgHandle.SocketDiagTCPInfo(family) +} + +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func (h *Handle) SocketDiagTCP(family uint8) ([]*Socket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*Socket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + result = append(result, sockInfo) + return true + }) if err != nil { return nil, err } - if from.Pid != nl.PidKernel { - return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return result, nil +} + +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func SocketDiagTCP(family uint8) ([]*Socket, error) { + return pkgHandle.SocketDiagTCP(family) +} + +// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info. +func (h *Handle) SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) { + // Construct the request + var extensions uint8 + extensions = 1 << (INET_DIAG_VEGASINFO - 1) + extensions |= 1 << (INET_DIAG_INFO - 1) + extensions |= 1 << (INET_DIAG_MEMINFO - 1) + + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_UDP, + Ext: extensions, + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*InetDiagUDPInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false + } + + var res *InetDiagUDPInfoResp + if res, err = attrsToInetDiagUDPInfoResp(attrs, sockInfo); err != nil { + return false + } + + result = append(result, res) + return true + }) + if err != nil { + return nil, err } - if len(msgs) == 0 { - return nil, errors.New("no message nor error from netlink") + return result, nil +} + +// SocketDiagUDPInfo requests INET_DIAG_INFO for UDP protocol for specified family type and return with extension info. +func SocketDiagUDPInfo(family uint8) ([]*InetDiagUDPInfoResp, error) { + return pkgHandle.SocketDiagUDPInfo(family) +} + +// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket. +func (h *Handle) SocketDiagUDP(family uint8) ([]*Socket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_UDP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // all states + }) + + // Do the query and parse the result + var result []*Socket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &Socket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + result = append(result, sockInfo) + return true + }) + if err != nil { + return nil, err } - if len(msgs) > 2 { - return nil, fmt.Errorf("multiple (%d) matching sockets", len(msgs)) + return result, nil +} + +// SocketDiagUDP requests INET_DIAG_INFO for UDP protocol for specified family type and return related socket. +func SocketDiagUDP(family uint8) ([]*Socket, error) { + return pkgHandle.SocketDiagUDP(family) +} + +// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info. +func (h *Handle) UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) { + // Construct the request + var extensions uint8 + extensions = 1 << UNIX_DIAG_NAME + extensions |= 1 << UNIX_DIAG_PEER + extensions |= 1 << UNIX_DIAG_RQLEN + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&unixSocketRequest{ + Family: unix.AF_UNIX, + States: ^uint32(0), // all states + Show: uint32(extensions), + }) + + var result []*UnixDiagInfoResp + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &UnixSocket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + // Diagnosis also delivers sockets with AF_INET family, filter those + if sockInfo.Family != unix.AF_UNIX { + return false + } + + var attrs []syscall.NetlinkRouteAttr + if attrs, err = nl.ParseRouteAttr(msg[sizeofSocket:]); err != nil { + return false + } + + var res *UnixDiagInfoResp + if res, err = attrsToUnixDiagInfoResp(attrs, sockInfo); err != nil { + return false + } + result = append(result, res) + return true + }) + if err != nil { + return nil, err } - sock := &Socket{} - if err := sock.deserialize(msgs[0].Data); err != nil { + return result, nil +} + +// UnixSocketDiagInfo requests UNIX_DIAG_INFO for unix sockets and return with extension info. +func UnixSocketDiagInfo() ([]*UnixDiagInfoResp, error) { + return pkgHandle.UnixSocketDiagInfo() +} + +// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets. +func (h *Handle) UnixSocketDiag() ([]*UnixSocket, error) { + // Construct the request + req := h.newNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&unixSocketRequest{ + Family: unix.AF_UNIX, + States: ^uint32(0), // all states + }) + + var result []*UnixSocket + var err error + err = req.ExecuteIter(unix.NETLINK_INET_DIAG, nl.SOCK_DIAG_BY_FAMILY, func(msg []byte) bool { + sockInfo := &UnixSocket{} + if err = sockInfo.deserialize(msg); err != nil { + return false + } + + // Diagnosis also delivers sockets with AF_INET family, filter those + if sockInfo.Family == unix.AF_UNIX { + result = append(result, sockInfo) + } + return true + }) + if err != nil { return nil, err } - return sock, nil + return result, nil +} + +// UnixSocketDiag requests UNIX_DIAG_INFO for unix sockets. +func UnixSocketDiag() ([]*UnixSocket, error) { + return pkgHandle.UnixSocketDiag() +} + +func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) { + info := &InetDiagTCPInfoResp{ + InetDiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case INET_DIAG_INFO: + info.TCPInfo = &TCPInfo{} + if err := info.TCPInfo.deserialize(a.Value); err != nil { + return nil, err + } + case INET_DIAG_BBRINFO: + info.TCPBBRInfo = &TCPBBRInfo{} + if err := info.TCPBBRInfo.deserialize(a.Value); err != nil { + return nil, err + } + } + } + + return info, nil +} + +func attrsToInetDiagUDPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagUDPInfoResp, error) { + info := &InetDiagUDPInfoResp{ + InetDiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case INET_DIAG_MEMINFO: + info.Memory = &MemInfo{} + if err := info.Memory.deserialize(a.Value); err != nil { + return nil, err + } + } + } + + return info, nil +} + +func attrsToUnixDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *UnixSocket) (*UnixDiagInfoResp, error) { + info := &UnixDiagInfoResp{ + DiagMsg: sockInfo, + } + for _, a := range attrs { + switch a.Attr.Type { + case UNIX_DIAG_NAME: + name := string(a.Value[:a.Attr.Len]) + info.Name = &name + case UNIX_DIAG_PEER: + peer := native.Uint32(a.Value) + info.Peer = &peer + case UNIX_DIAG_RQLEN: + info.Queue = &QueueInfo{ + RQueue: native.Uint32(a.Value[:4]), + WQueue: native.Uint32(a.Value[4:]), + } + // default: + // fmt.Println("unknown unix attribute type", a.Attr.Type, "with data", a.Value) + } + } + + return info, nil } diff --git a/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go b/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go new file mode 100644 index 00000000000..20c82f9c766 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/socket_xdp_linux.go @@ -0,0 +1,195 @@ +package netlink + +import ( + "errors" + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const ( + sizeofXDPSocketRequest = 1 + 1 + 2 + 4 + 4 + 2*4 + sizeofXDPSocket = 0x10 +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L12 +type xdpSocketRequest struct { + Family uint8 + Protocol uint8 + pad uint16 + Ino uint32 + Show uint32 + Cookie [2]uint32 +} + +func (r *xdpSocketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + native.PutUint16(b.Next(2), r.pad) + native.PutUint32(b.Next(4), r.Ino) + native.PutUint32(b.Next(4), r.Show) + native.PutUint32(b.Next(4), r.Cookie[0]) + native.PutUint32(b.Next(4), r.Cookie[1]) + return b.Bytes +} + +func (r *xdpSocketRequest) Len() int { return sizeofXDPSocketRequest } + +func (s *XDPSocket) deserialize(b []byte) error { + if len(b) < sizeofXDPSocket { + return fmt.Errorf("XDP socket data short read (%d); want %d", len(b), sizeofXDPSocket) + } + rb := readBuffer{Bytes: b} + s.Family = rb.Read() + s.Type = rb.Read() + s.pad = native.Uint16(rb.Next(2)) + s.Ino = native.Uint32(rb.Next(4)) + s.Cookie[0] = native.Uint32(rb.Next(4)) + s.Cookie[1] = native.Uint32(rb.Next(4)) + return nil +} + +// XDPSocketGet returns the XDP socket identified by its inode number and/or +// socket cookie. Specify the cookie as SOCK_ANY_COOKIE if +func SocketXDPGetInfo(ino uint32, cookie uint64) (*XDPDiagInfoResp, error) { + // We have a problem here: dumping AF_XDP sockets currently does not support + // filtering. We thus need to dump all XSKs and then only filter afterwards + // :( + xsks, err := SocketDiagXDP() + if err != nil { + return nil, err + } + checkCookie := cookie != SOCK_ANY_COOKIE && cookie != 0 + crumblingCookie := [2]uint32{uint32(cookie), uint32(cookie >> 32)} + checkIno := ino != 0 + var xskinfo *XDPDiagInfoResp + for _, xsk := range xsks { + if checkIno && xsk.XDPDiagMsg.Ino != ino { + continue + } + if checkCookie && xsk.XDPDiagMsg.Cookie != crumblingCookie { + continue + } + if xskinfo != nil { + return nil, errors.New("multiple matching XDP sockets") + } + xskinfo = xsk + } + if xskinfo == nil { + return nil, errors.New("no matching XDP socket") + } + return xskinfo, nil +} + +// SocketDiagXDP requests XDP_DIAG_INFO for XDP family sockets. +func SocketDiagXDP() ([]*XDPDiagInfoResp, error) { + var result []*XDPDiagInfoResp + err := socketDiagXDPExecutor(func(m syscall.NetlinkMessage) error { + sockInfo := &XDPSocket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + attrs, err := nl.ParseRouteAttr(m.Data[sizeofXDPSocket:]) + if err != nil { + return err + } + + res, err := attrsToXDPDiagInfoResp(attrs, sockInfo) + if err != nil { + return err + } + + result = append(result, res) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// socketDiagXDPExecutor requests XDP_DIAG_INFO for XDP family sockets. +func socketDiagXDPExecutor(receiver func(syscall.NetlinkMessage) error) error { + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return err + } + defer s.Close() + + req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&xdpSocketRequest{ + Family: unix.AF_XDP, + Show: XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM | XDP_SHOW_STATS, + }) + if err := s.Send(req); err != nil { + return err + } + +loop: + for { + msgs, from, err := s.Receive() + if err != nil { + return err + } + if from.Pid != nl.PidKernel { + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } + if len(msgs) == 0 { + return errors.New("no message nor error from netlink") + } + + for _, m := range msgs { + switch m.Header.Type { + case unix.NLMSG_DONE: + break loop + case unix.NLMSG_ERROR: + error := int32(native.Uint32(m.Data[0:4])) + return syscall.Errno(-error) + } + if err := receiver(m); err != nil { + return err + } + } + } + return nil +} + +func attrsToXDPDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *XDPSocket) (*XDPDiagInfoResp, error) { + resp := &XDPDiagInfoResp{ + XDPDiagMsg: sockInfo, + XDPInfo: &XDPInfo{}, + } + for _, a := range attrs { + switch a.Attr.Type { + case XDP_DIAG_INFO: + resp.XDPInfo.Ifindex = native.Uint32(a.Value[0:4]) + resp.XDPInfo.QueueID = native.Uint32(a.Value[4:8]) + case XDP_DIAG_UID: + resp.XDPInfo.UID = native.Uint32(a.Value[0:4]) + case XDP_DIAG_RX_RING: + resp.XDPInfo.RxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_TX_RING: + resp.XDPInfo.TxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_FILL_RING: + resp.XDPInfo.UmemFillRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_COMPLETION_RING: + resp.XDPInfo.UmemCompletionRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM: + umem := &XDPDiagUmem{} + if err := umem.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Umem = umem + case XDP_DIAG_STATS: + stats := &XDPDiagStats{} + if err := stats.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Stats = stats + } + } + return resp, nil +} diff --git a/vendor/github.com/vishvananda/netlink/tcp.go b/vendor/github.com/vishvananda/netlink/tcp.go new file mode 100644 index 00000000000..43f80a0fca7 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp.go @@ -0,0 +1,92 @@ +package netlink + +// TCP States +const ( + TCP_ESTABLISHED = iota + 0x01 + TCP_SYN_SENT + TCP_SYN_RECV + TCP_FIN_WAIT1 + TCP_FIN_WAIT2 + TCP_TIME_WAIT + TCP_CLOSE + TCP_CLOSE_WAIT + TCP_LAST_ACK + TCP_LISTEN + TCP_CLOSING + TCP_NEW_SYN_REC + TCP_MAX_STATES +) + +type TCPInfo struct { + State uint8 + Ca_state uint8 + Retransmits uint8 + Probes uint8 + Backoff uint8 + Options uint8 + Snd_wscale uint8 // no uint4 + Rcv_wscale uint8 + Delivery_rate_app_limited uint8 + Fastopen_client_fail uint8 + Rto uint32 + Ato uint32 + Snd_mss uint32 + Rcv_mss uint32 + Unacked uint32 + Sacked uint32 + Lost uint32 + Retrans uint32 + Fackets uint32 + Last_data_sent uint32 + Last_ack_sent uint32 + Last_data_recv uint32 + Last_ack_recv uint32 + Pmtu uint32 + Rcv_ssthresh uint32 + Rtt uint32 + Rttvar uint32 + Snd_ssthresh uint32 + Snd_cwnd uint32 + Advmss uint32 + Reordering uint32 + Rcv_rtt uint32 + Rcv_space uint32 + Total_retrans uint32 + Pacing_rate uint64 + Max_pacing_rate uint64 + Bytes_acked uint64 /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + Bytes_received uint64 /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + Segs_out uint32 /* RFC4898 tcpEStatsPerfSegsOut */ + Segs_in uint32 /* RFC4898 tcpEStatsPerfSegsIn */ + Notsent_bytes uint32 + Min_rtt uint32 + Data_segs_in uint32 /* RFC4898 tcpEStatsDataSegsIn */ + Data_segs_out uint32 /* RFC4898 tcpEStatsDataSegsOut */ + Delivery_rate uint64 + Busy_time uint64 /* Time (usec) busy sending data */ + Rwnd_limited uint64 /* Time (usec) limited by receive window */ + Sndbuf_limited uint64 /* Time (usec) limited by send buffer */ + Delivered uint32 + Delivered_ce uint32 + Bytes_sent uint64 /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ + Bytes_retrans uint64 /* RFC4898 tcpEStatsPerfOctetsRetrans */ + Dsack_dups uint32 /* RFC4898 tcpEStatsStackDSACKDups */ + Reord_seen uint32 /* reordering events seen */ + Rcv_ooopack uint32 /* Out-of-order packets received */ + Snd_wnd uint32 /* peer's advertised receive window after * scaling (bytes) */ +} + +type TCPBBRInfo struct { + BBRBW uint64 + BBRMinRTT uint32 + BBRPacingGain uint32 + BBRCwndGain uint32 +} + +// According to https://man7.org/linux/man-pages/man7/sock_diag.7.html +type MemInfo struct { + RMem uint32 + WMem uint32 + FMem uint32 + TMem uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/tcp_linux.go b/vendor/github.com/vishvananda/netlink/tcp_linux.go new file mode 100644 index 00000000000..e98036da55b --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp_linux.go @@ -0,0 +1,368 @@ +package netlink + +import ( + "bytes" + "errors" + "io" +) + +const ( + tcpBBRInfoLen = 20 + memInfoLen = 16 +) + +func checkDeserErr(err error) error { + if err == io.EOF { + return nil + } + return err +} + +func (t *TCPInfo) deserialize(b []byte) error { + var err error + rb := bytes.NewBuffer(b) + + t.State, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Ca_state, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Retransmits, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Probes, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Backoff, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Options, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + scales, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Snd_wscale = scales >> 4 // first 4 bits + t.Rcv_wscale = scales & 0xf // last 4 bits + + rateLimAndFastOpen, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Delivery_rate_app_limited = rateLimAndFastOpen >> 7 // get first bit + t.Fastopen_client_fail = rateLimAndFastOpen >> 5 & 3 // get next two bits + + next := rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rto = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Ato = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Unacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Sacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Lost = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Retrans = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Fackets = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Pmtu = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rttvar = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_cwnd = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Advmss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reordering = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_space = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Total_retrans = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Max_pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_acked = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_received = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_out = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Notsent_bytes = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Min_rtt = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_out = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Delivery_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Busy_time = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Rwnd_limited = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Sndbuf_limited = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered_ce = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_sent = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_retrans = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Dsack_dups = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reord_seen = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ooopack = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_wnd = native.Uint32(next) + return nil +} + +func (t *TCPBBRInfo) deserialize(b []byte) error { + if len(b) != tcpBBRInfoLen { + return errors.New("Invalid length") + } + + rb := bytes.NewBuffer(b) + t.BBRBW = native.Uint64(rb.Next(8)) + t.BBRMinRTT = native.Uint32(rb.Next(4)) + t.BBRPacingGain = native.Uint32(rb.Next(4)) + t.BBRCwndGain = native.Uint32(rb.Next(4)) + + return nil +} + +func (m *MemInfo) deserialize(b []byte) error { + if len(b) != memInfoLen { + return errors.New("Invalid length") + } + + rb := bytes.NewBuffer(b) + m.RMem = native.Uint32(rb.Next(4)) + m.WMem = native.Uint32(rb.Next(4)) + m.FMem = native.Uint32(rb.Next(4)) + m.TMem = native.Uint32(rb.Next(4)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/unix_diag.go b/vendor/github.com/vishvananda/netlink/unix_diag.go new file mode 100644 index 00000000000..d81776f36ef --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/unix_diag.go @@ -0,0 +1,27 @@ +package netlink + +// According to linux/include/uapi/linux/unix_diag.h +const ( + UNIX_DIAG_NAME = iota + UNIX_DIAG_VFS + UNIX_DIAG_PEER + UNIX_DIAG_ICONS + UNIX_DIAG_RQLEN + UNIX_DIAG_MEMINFO + UNIX_DIAG_SHUTDOWN + UNIX_DIAG_UID + UNIX_DIAG_MAX +) + +type UnixDiagInfoResp struct { + DiagMsg *UnixSocket + Name *string + Peer *uint32 + Queue *QueueInfo + Shutdown *uint8 +} + +type QueueInfo struct { + RQueue uint32 + WQueue uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/vdpa_linux.go b/vendor/github.com/vishvananda/netlink/vdpa_linux.go new file mode 100644 index 00000000000..7c15986d0f9 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/vdpa_linux.go @@ -0,0 +1,463 @@ +package netlink + +import ( + "fmt" + "net" + "syscall" + + "golang.org/x/sys/unix" + + "github.com/vishvananda/netlink/nl" +) + +type vdpaDevID struct { + Name string + ID uint32 +} + +// VDPADev contains info about VDPA device +type VDPADev struct { + vdpaDevID + VendorID uint32 + MaxVQS uint32 + MaxVQSize uint16 + MinVQSize uint16 +} + +// VDPADevConfig contains configuration of the VDPA device +type VDPADevConfig struct { + vdpaDevID + Features uint64 + NegotiatedFeatures uint64 + Net VDPADevConfigNet +} + +// VDPADevVStats conatins vStats for the VDPA device +type VDPADevVStats struct { + vdpaDevID + QueueIndex uint32 + Vendor []VDPADevVStatsVendor + NegotiatedFeatures uint64 +} + +// VDPADevVStatsVendor conatins name and value for vendor specific vstat option +type VDPADevVStatsVendor struct { + Name string + Value uint64 +} + +// VDPADevConfigNet conatins status and net config for the VDPA device +type VDPADevConfigNet struct { + Status VDPADevConfigNetStatus + Cfg VDPADevConfigNetCfg +} + +// VDPADevConfigNetStatus contains info about net status +type VDPADevConfigNetStatus struct { + LinkUp bool + Announce bool +} + +// VDPADevConfigNetCfg contains net config for the VDPA device +type VDPADevConfigNetCfg struct { + MACAddr net.HardwareAddr + MaxVQP uint16 + MTU uint16 +} + +// VDPAMGMTDev conatins info about VDPA management device +type VDPAMGMTDev struct { + BusName string + DevName string + SupportedClasses uint64 + SupportedFeatures uint64 + MaxVQS uint32 +} + +// VDPANewDevParams contains parameters for new VDPA device +// use SetBits to configure requried features for the device +// example: +// +// VDPANewDevParams{Features: SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR)} +type VDPANewDevParams struct { + MACAddr net.HardwareAddr + MaxVQP uint16 + MTU uint16 + Features uint64 +} + +// SetBits set provided bits in the uint64 input value +// usage example: +// features := SetBits(0, VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_MAC_ADDR) +func SetBits(input uint64, pos ...int) uint64 { + for _, p := range pos { + input |= 1 << uint64(p) + } + return input +} + +// IsBitSet check if specific bit is set in the uint64 input value +// usage example: +// hasNetClass := IsBitSet(mgmtDev, VIRTIO_ID_NET) +func IsBitSet(input uint64, pos int) bool { + val := input & (1 << uint64(pos)) + return val > 0 +} + +// VDPANewDev adds new VDPA device +// Equivalent to: `vdpa dev add name mgmtdev /mgmtName [params]` +func VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error { + return pkgHandle.VDPANewDev(name, mgmtBus, mgmtName, params) +} + +// VDPADelDev removes VDPA device +// Equivalent to: `vdpa dev del ` +func VDPADelDev(name string) error { + return pkgHandle.VDPADelDev(name) +} + +// VDPAGetDevList returns list of VDPA devices +// Equivalent to: `vdpa dev show` +func VDPAGetDevList() ([]*VDPADev, error) { + return pkgHandle.VDPAGetDevList() +} + +// VDPAGetDevByName returns VDPA device selected by name +// Equivalent to: `vdpa dev show ` +func VDPAGetDevByName(name string) (*VDPADev, error) { + return pkgHandle.VDPAGetDevByName(name) +} + +// VDPAGetDevConfigList returns list of VDPA devices configurations +// Equivalent to: `vdpa dev config show` +func VDPAGetDevConfigList() ([]*VDPADevConfig, error) { + return pkgHandle.VDPAGetDevConfigList() +} + +// VDPAGetDevConfigByName returns VDPA device configuration selected by name +// Equivalent to: `vdpa dev config show ` +func VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) { + return pkgHandle.VDPAGetDevConfigByName(name) +} + +// VDPAGetDevVStats returns vstats for VDPA device +// Equivalent to: `vdpa dev vstats show qidx ` +func VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) { + return pkgHandle.VDPAGetDevVStats(name, queueIndex) +} + +// VDPAGetMGMTDevList returns list of mgmt devices +// Equivalent to: `vdpa mgmtdev show` +func VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) { + return pkgHandle.VDPAGetMGMTDevList() +} + +// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name +// Equivalent to: `vdpa mgmtdev show /` +func VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) { + return pkgHandle.VDPAGetMGMTDevByBusAndName(bus, name) +} + +type vdpaNetlinkMessage []syscall.NetlinkRouteAttr + +func (id *vdpaDevID) parseIDAttribute(attr syscall.NetlinkRouteAttr) { + switch attr.Attr.Type { + case nl.VDPA_ATTR_DEV_NAME: + id.Name = nl.BytesToString(attr.Value) + case nl.VDPA_ATTR_DEV_ID: + id.ID = native.Uint32(attr.Value) + } +} + +func (netStatus *VDPADevConfigNetStatus) parseStatusAttribute(value []byte) { + a := native.Uint16(value) + netStatus.Announce = (a & VIRTIO_NET_S_ANNOUNCE) > 0 + netStatus.LinkUp = (a & VIRTIO_NET_S_LINK_UP) > 0 +} + +func (d *VDPADev) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + d.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_VENDOR_ID: + d.VendorID = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_MAX_VQS: + d.MaxVQS = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_MAX_VQ_SIZE: + d.MaxVQSize = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_MIN_VQ_SIZE: + d.MinVQSize = native.Uint16(a.Value) + } + } +} + +func (c *VDPADevConfig) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + c.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_NET_CFG_MACADDR: + c.Net.Cfg.MACAddr = a.Value + case nl.VDPA_ATTR_DEV_NET_STATUS: + c.Net.Status.parseStatusAttribute(a.Value) + case nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP: + c.Net.Cfg.MaxVQP = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_NET_CFG_MTU: + c.Net.Cfg.MTU = native.Uint16(a.Value) + case nl.VDPA_ATTR_DEV_FEATURES: + c.Features = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES: + c.NegotiatedFeatures = native.Uint64(a.Value) + } + } +} + +func (s *VDPADevVStats) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + s.parseIDAttribute(a) + switch a.Attr.Type { + case nl.VDPA_ATTR_DEV_QUEUE_INDEX: + s.QueueIndex = native.Uint32(a.Value) + case nl.VDPA_ATTR_DEV_VENDOR_ATTR_NAME: + s.Vendor = append(s.Vendor, VDPADevVStatsVendor{Name: nl.BytesToString(a.Value)}) + case nl.VDPA_ATTR_DEV_VENDOR_ATTR_VALUE: + if len(s.Vendor) == 0 { + break + } + s.Vendor[len(s.Vendor)-1].Value = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_NEGOTIATED_FEATURES: + s.NegotiatedFeatures = native.Uint64(a.Value) + } + } +} + +func (d *VDPAMGMTDev) parseAttributes(attrs vdpaNetlinkMessage) { + for _, a := range attrs { + switch a.Attr.Type { + case nl.VDPA_ATTR_MGMTDEV_BUS_NAME: + d.BusName = nl.BytesToString(a.Value) + case nl.VDPA_ATTR_MGMTDEV_DEV_NAME: + d.DevName = nl.BytesToString(a.Value) + case nl.VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES: + d.SupportedClasses = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_SUPPORTED_FEATURES: + d.SupportedFeatures = native.Uint64(a.Value) + case nl.VDPA_ATTR_DEV_MGMTDEV_MAX_VQS: + d.MaxVQS = native.Uint32(a.Value) + } + } +} + +func (h *Handle) vdpaRequest(command uint8, extraFlags int, attrs []*nl.RtAttr) ([]vdpaNetlinkMessage, error) { + f, err := h.GenlFamilyGet(nl.VDPA_GENL_NAME) + if err != nil { + return nil, err + } + req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_ACK|extraFlags) + req.AddData(&nl.Genlmsg{ + Command: command, + Version: nl.VDPA_GENL_VERSION, + }) + for _, a := range attrs { + req.AddData(a) + } + + resp, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + messages := make([]vdpaNetlinkMessage, 0, len(resp)) + for _, m := range resp { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + messages = append(messages, attrs) + } + return messages, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaDevGet(dev *string) ([]*VDPADev, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev))) + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + devs := make([]*VDPADev, 0, len(messages)) + for _, m := range messages { + d := &VDPADev{} + d.parseAttributes(m) + devs = append(devs, d) + } + return devs, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaDevConfigGet(dev *string) ([]*VDPADevConfig, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(*dev))) + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_CONFIG_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + cfgs := make([]*VDPADevConfig, 0, len(messages)) + for _, m := range messages { + cfg := &VDPADevConfig{} + cfg.parseAttributes(m) + cfgs = append(cfgs, cfg) + } + return cfgs, nil +} + +// dump all devices if dev is nil +func (h *Handle) vdpaMGMTDevGet(bus, dev *string) ([]*VDPAMGMTDev, error) { + var extraFlags int + var attrs []*nl.RtAttr + if dev != nil { + attrs = append(attrs, + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(*dev)), + ) + if bus != nil { + attrs = append(attrs, + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(*bus)), + ) + } + } else { + extraFlags = extraFlags | unix.NLM_F_DUMP + } + messages, err := h.vdpaRequest(nl.VDPA_CMD_MGMTDEV_GET, extraFlags, attrs) + if err != nil { + return nil, err + } + cfgs := make([]*VDPAMGMTDev, 0, len(messages)) + for _, m := range messages { + cfg := &VDPAMGMTDev{} + cfg.parseAttributes(m) + cfgs = append(cfgs, cfg) + } + return cfgs, nil +} + +// VDPANewDev adds new VDPA device +// Equivalent to: `vdpa dev add name mgmtdev /mgmtName [params]` +func (h *Handle) VDPANewDev(name, mgmtBus, mgmtName string, params VDPANewDevParams) error { + attrs := []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)), + nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_DEV_NAME, nl.ZeroTerminated(mgmtName)), + } + if mgmtBus != "" { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_MGMTDEV_BUS_NAME, nl.ZeroTerminated(mgmtBus))) + } + if len(params.MACAddr) != 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MACADDR, params.MACAddr)) + } + if params.MaxVQP > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MAX_VQP, nl.Uint16Attr(params.MaxVQP))) + } + if params.MTU > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_NET_CFG_MTU, nl.Uint16Attr(params.MTU))) + } + if params.Features > 0 { + attrs = append(attrs, nl.NewRtAttr(nl.VDPA_ATTR_DEV_FEATURES, nl.Uint64Attr(params.Features))) + } + _, err := h.vdpaRequest(nl.VDPA_CMD_DEV_NEW, 0, attrs) + return err +} + +// VDPADelDev removes VDPA device +// Equivalent to: `vdpa dev del ` +func (h *Handle) VDPADelDev(name string) error { + _, err := h.vdpaRequest(nl.VDPA_CMD_DEV_DEL, 0, []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name))}) + return err +} + +// VDPAGetDevList returns list of VDPA devices +// Equivalent to: `vdpa dev show` +func (h *Handle) VDPAGetDevList() ([]*VDPADev, error) { + return h.vdpaDevGet(nil) +} + +// VDPAGetDevByName returns VDPA device selected by name +// Equivalent to: `vdpa dev show ` +func (h *Handle) VDPAGetDevByName(name string) (*VDPADev, error) { + devs, err := h.vdpaDevGet(&name) + if err != nil { + return nil, err + } + if len(devs) == 0 { + return nil, fmt.Errorf("device not found") + } + return devs[0], nil +} + +// VDPAGetDevConfigList returns list of VDPA devices configurations +// Equivalent to: `vdpa dev config show` +func (h *Handle) VDPAGetDevConfigList() ([]*VDPADevConfig, error) { + return h.vdpaDevConfigGet(nil) +} + +// VDPAGetDevConfigByName returns VDPA device configuration selected by name +// Equivalent to: `vdpa dev config show ` +func (h *Handle) VDPAGetDevConfigByName(name string) (*VDPADevConfig, error) { + cfgs, err := h.vdpaDevConfigGet(&name) + if err != nil { + return nil, err + } + if len(cfgs) == 0 { + return nil, fmt.Errorf("configuration not found") + } + return cfgs[0], nil +} + +// VDPAGetDevVStats returns vstats for VDPA device +// Equivalent to: `vdpa dev vstats show qidx ` +func (h *Handle) VDPAGetDevVStats(name string, queueIndex uint32) (*VDPADevVStats, error) { + messages, err := h.vdpaRequest(nl.VDPA_CMD_DEV_VSTATS_GET, 0, []*nl.RtAttr{ + nl.NewRtAttr(nl.VDPA_ATTR_DEV_NAME, nl.ZeroTerminated(name)), + nl.NewRtAttr(nl.VDPA_ATTR_DEV_QUEUE_INDEX, nl.Uint32Attr(queueIndex)), + }) + if err != nil { + return nil, err + } + if len(messages) == 0 { + return nil, fmt.Errorf("stats not found") + } + stats := &VDPADevVStats{} + stats.parseAttributes(messages[0]) + return stats, nil +} + +// VDPAGetMGMTDevList returns list of mgmt devices +// Equivalent to: `vdpa mgmtdev show` +func (h *Handle) VDPAGetMGMTDevList() ([]*VDPAMGMTDev, error) { + return h.vdpaMGMTDevGet(nil, nil) +} + +// VDPAGetMGMTDevByBusAndName returns mgmt devices selected by bus and name +// Equivalent to: `vdpa mgmtdev show /` +func (h *Handle) VDPAGetMGMTDevByBusAndName(bus, name string) (*VDPAMGMTDev, error) { + var busPtr *string + if bus != "" { + busPtr = &bus + } + devs, err := h.vdpaMGMTDevGet(busPtr, &name) + if err != nil { + return nil, err + } + if len(devs) == 0 { + return nil, fmt.Errorf("mgmtdev not found") + } + return devs[0], nil +} diff --git a/vendor/github.com/vishvananda/netlink/virtio.go b/vendor/github.com/vishvananda/netlink/virtio.go new file mode 100644 index 00000000000..78a497bbc3b --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/virtio.go @@ -0,0 +1,132 @@ +package netlink + +// features for virtio net +const ( + VIRTIO_NET_F_CSUM = 0 // Host handles pkts w/ partial csum + VIRTIO_NET_F_GUEST_CSUM = 1 // Guest handles pkts w/ partial csum + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS = 2 // Dynamic offload configuration. + VIRTIO_NET_F_MTU = 3 // Initial MTU advice + VIRTIO_NET_F_MAC = 5 // Host has given MAC address. + VIRTIO_NET_F_GUEST_TSO4 = 7 // Guest can handle TSOv4 in. + VIRTIO_NET_F_GUEST_TSO6 = 8 // Guest can handle TSOv6 in. + VIRTIO_NET_F_GUEST_ECN = 9 // Guest can handle TSO[6] w/ ECN in. + VIRTIO_NET_F_GUEST_UFO = 10 // Guest can handle UFO in. + VIRTIO_NET_F_HOST_TSO4 = 11 // Host can handle TSOv4 in. + VIRTIO_NET_F_HOST_TSO6 = 12 // Host can handle TSOv6 in. + VIRTIO_NET_F_HOST_ECN = 13 // Host can handle TSO[6] w/ ECN in. + VIRTIO_NET_F_HOST_UFO = 14 // Host can handle UFO in. + VIRTIO_NET_F_MRG_RXBUF = 15 // Host can merge receive buffers. + VIRTIO_NET_F_STATUS = 16 // virtio_net_config.status available + VIRTIO_NET_F_CTRL_VQ = 17 // Control channel available + VIRTIO_NET_F_CTRL_RX = 18 // Control channel RX mode support + VIRTIO_NET_F_CTRL_VLAN = 19 // Control channel VLAN filtering + VIRTIO_NET_F_CTRL_RX_EXTRA = 20 // Extra RX mode control support + VIRTIO_NET_F_GUEST_ANNOUNCE = 21 // Guest can announce device on the* network + VIRTIO_NET_F_MQ = 22 // Device supports Receive Flow Steering + VIRTIO_NET_F_CTRL_MAC_ADDR = 23 // Set MAC address + VIRTIO_NET_F_VQ_NOTF_COAL = 52 // Device supports virtqueue notification coalescing + VIRTIO_NET_F_NOTF_COAL = 53 // Device supports notifications coalescing + VIRTIO_NET_F_GUEST_USO4 = 54 // Guest can handle USOv4 in. + VIRTIO_NET_F_GUEST_USO6 = 55 // Guest can handle USOv6 in. + VIRTIO_NET_F_HOST_USO = 56 // Host can handle USO in. + VIRTIO_NET_F_HASH_REPORT = 57 // Supports hash report + VIRTIO_NET_F_GUEST_HDRLEN = 59 // Guest provides the exact hdr_len value. + VIRTIO_NET_F_RSS = 60 // Supports RSS RX steering + VIRTIO_NET_F_RSC_EXT = 61 // extended coalescing info + VIRTIO_NET_F_STANDBY = 62 // Act as standby for another device with the same MAC. + VIRTIO_NET_F_SPEED_DUPLEX = 63 // Device set linkspeed and duplex + VIRTIO_NET_F_GSO = 6 // Host handles pkts any GSO type +) + +// virtio net status +const ( + VIRTIO_NET_S_LINK_UP = 1 // Link is up + VIRTIO_NET_S_ANNOUNCE = 2 // Announcement is needed +) + +// virtio config +const ( + // Do we get callbacks when the ring is completely used, even if we've + // suppressed them? + VIRTIO_F_NOTIFY_ON_EMPTY = 24 + // Can the device handle any descriptor layout? + VIRTIO_F_ANY_LAYOUT = 27 + // v1.0 compliant + VIRTIO_F_VERSION_1 = 32 + // If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature. + // If set - use platform DMA tools to access the memory. + // Note the reverse polarity (compared to most other features), + // this is for compatibility with legacy systems. + VIRTIO_F_ACCESS_PLATFORM = 33 + // Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) + VIRTIO_F_IOMMU_PLATFORM = VIRTIO_F_ACCESS_PLATFORM + // This feature indicates support for the packed virtqueue layout. + VIRTIO_F_RING_PACKED = 34 + // Inorder feature indicates that all buffers are used by the device + // in the same order in which they have been made available. + VIRTIO_F_IN_ORDER = 35 + // This feature indicates that memory accesses by the driver and the + // device are ordered in a way described by the platform. + VIRTIO_F_ORDER_PLATFORM = 36 + // Does the device support Single Root I/O Virtualization? + VIRTIO_F_SR_IOV = 37 + // This feature indicates that the driver passes extra data (besides + // identifying the virtqueue) in its device notifications. + VIRTIO_F_NOTIFICATION_DATA = 38 + // This feature indicates that the driver uses the data provided by the device + // as a virtqueue identifier in available buffer notifications. + VIRTIO_F_NOTIF_CONFIG_DATA = 39 + // This feature indicates that the driver can reset a queue individually. + VIRTIO_F_RING_RESET = 40 +) + +// virtio device ids +const ( + VIRTIO_ID_NET = 1 // virtio net + VIRTIO_ID_BLOCK = 2 // virtio block + VIRTIO_ID_CONSOLE = 3 // virtio console + VIRTIO_ID_RNG = 4 // virtio rng + VIRTIO_ID_BALLOON = 5 // virtio balloon + VIRTIO_ID_IOMEM = 6 // virtio ioMemory + VIRTIO_ID_RPMSG = 7 // virtio remote processor messaging + VIRTIO_ID_SCSI = 8 // virtio scsi + VIRTIO_ID_9P = 9 // 9p virtio console + VIRTIO_ID_MAC80211_WLAN = 10 // virtio WLAN MAC + VIRTIO_ID_RPROC_SERIAL = 11 // virtio remoteproc serial link + VIRTIO_ID_CAIF = 12 // Virtio caif + VIRTIO_ID_MEMORY_BALLOON = 13 // virtio memory balloon + VIRTIO_ID_GPU = 16 // virtio GPU + VIRTIO_ID_CLOCK = 17 // virtio clock/timer + VIRTIO_ID_INPUT = 18 // virtio input + VIRTIO_ID_VSOCK = 19 // virtio vsock transport + VIRTIO_ID_CRYPTO = 20 // virtio crypto + VIRTIO_ID_SIGNAL_DIST = 21 // virtio signal distribution device + VIRTIO_ID_PSTORE = 22 // virtio pstore device + VIRTIO_ID_IOMMU = 23 // virtio IOMMU + VIRTIO_ID_MEM = 24 // virtio mem + VIRTIO_ID_SOUND = 25 // virtio sound + VIRTIO_ID_FS = 26 // virtio filesystem + VIRTIO_ID_PMEM = 27 // virtio pmem + VIRTIO_ID_RPMB = 28 // virtio rpmb + VIRTIO_ID_MAC80211_HWSIM = 29 // virtio mac80211-hwsim + VIRTIO_ID_VIDEO_ENCODER = 30 // virtio video encoder + VIRTIO_ID_VIDEO_DECODER = 31 // virtio video decoder + VIRTIO_ID_SCMI = 32 // virtio SCMI + VIRTIO_ID_NITRO_SEC_MOD = 33 // virtio nitro secure module + VIRTIO_ID_I2C_ADAPTER = 34 // virtio i2c adapter + VIRTIO_ID_WATCHDOG = 35 // virtio watchdog + VIRTIO_ID_CAN = 36 // virtio can + VIRTIO_ID_DMABUF = 37 // virtio dmabuf + VIRTIO_ID_PARAM_SERV = 38 // virtio parameter server + VIRTIO_ID_AUDIO_POLICY = 39 // virtio audio policy + VIRTIO_ID_BT = 40 // virtio bluetooth + VIRTIO_ID_GPIO = 41 // virtio gpio + // Virtio Transitional IDs + VIRTIO_TRANS_ID_NET = 0x1000 // transitional virtio net + VIRTIO_TRANS_ID_BLOCK = 0x1001 // transitional virtio block + VIRTIO_TRANS_ID_BALLOON = 0x1002 // transitional virtio balloon + VIRTIO_TRANS_ID_CONSOLE = 0x1003 // transitional virtio console + VIRTIO_TRANS_ID_SCSI = 0x1004 // transitional virtio SCSI + VIRTIO_TRANS_ID_RNG = 0x1005 // transitional virtio rng + VIRTIO_TRANS_ID_9P = 0x1009 // transitional virtio 9p console +) diff --git a/vendor/github.com/vishvananda/netlink/xdp_diag.go b/vendor/github.com/vishvananda/netlink/xdp_diag.go new file mode 100644 index 00000000000..e88825bf55e --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xdp_diag.go @@ -0,0 +1,34 @@ +package netlink + +import "github.com/vishvananda/netlink/nl" + +const SOCK_ANY_COOKIE = uint64(nl.TCPDIAG_NOCOOKIE)<<32 + uint64(nl.TCPDIAG_NOCOOKIE) + +// XDP diagnosis show flag constants to request particular information elements. +const ( + XDP_SHOW_INFO = 1 << iota + XDP_SHOW_RING_CFG + XDP_SHOW_UMEM + XDP_SHOW_MEMINFO + XDP_SHOW_STATS +) + +// XDP diag element constants +const ( + XDP_DIAG_NONE = iota + XDP_DIAG_INFO // when using XDP_SHOW_INFO + XDP_DIAG_UID // when using XDP_SHOW_INFO + XDP_DIAG_RX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_TX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_UMEM // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_FILL_RING // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_COMPLETION_RING // when using XDP_SHOW_UMEM + XDP_DIAG_MEMINFO // when using XDP_SHOW_MEMINFO + XDP_DIAG_STATS // when using XDP_SHOW_STATS +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 +type XDPDiagInfoResp struct { + XDPDiagMsg *XDPSocket + XDPInfo *XDPInfo +} diff --git a/vendor/github.com/vishvananda/netlink/xdp_linux.go b/vendor/github.com/vishvananda/netlink/xdp_linux.go new file mode 100644 index 00000000000..896a406deb4 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xdp_linux.go @@ -0,0 +1,46 @@ +package netlink + +import ( + "bytes" + "fmt" +) + +const ( + xdrDiagUmemLen = 8 + 8*4 + xdrDiagStatsLen = 6 * 8 +) + +func (x *XDPDiagUmem) deserialize(b []byte) error { + if len(b) < xdrDiagUmemLen { + return fmt.Errorf("XDP umem diagnosis data short read (%d); want %d", len(b), xdrDiagUmemLen) + } + + rb := bytes.NewBuffer(b) + x.Size = native.Uint64(rb.Next(8)) + x.ID = native.Uint32(rb.Next(4)) + x.NumPages = native.Uint32(rb.Next(4)) + x.ChunkSize = native.Uint32(rb.Next(4)) + x.Headroom = native.Uint32(rb.Next(4)) + x.Ifindex = native.Uint32(rb.Next(4)) + x.QueueID = native.Uint32(rb.Next(4)) + x.Flags = native.Uint32(rb.Next(4)) + x.Refs = native.Uint32(rb.Next(4)) + + return nil +} + +func (x *XDPDiagStats) deserialize(b []byte) error { + if len(b) < xdrDiagStatsLen { + return fmt.Errorf("XDP diagnosis statistics short read (%d); want %d", len(b), xdrDiagStatsLen) + } + + rb := bytes.NewBuffer(b) + x.RxDropped = native.Uint64(rb.Next(8)) + x.RxInvalid = native.Uint64(rb.Next(8)) + x.RxFull = native.Uint64(rb.Next(8)) + x.FillRingEmpty = native.Uint64(rb.Next(8)) + x.TxInvalid = native.Uint64(rb.Next(8)) + x.TxRingEmpty = native.Uint64(rb.Next(8)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm.go b/vendor/github.com/vishvananda/netlink/xfrm_linux.go similarity index 95% rename from vendor/github.com/vishvananda/netlink/xfrm.go rename to vendor/github.com/vishvananda/netlink/xfrm_linux.go index 02b41842e10..dd38ed8e082 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_linux.go @@ -14,7 +14,7 @@ const ( XFRM_PROTO_ESP Proto = unix.IPPROTO_ESP XFRM_PROTO_AH Proto = unix.IPPROTO_AH XFRM_PROTO_HAO Proto = unix.IPPROTO_DSTOPTS - XFRM_PROTO_COMP Proto = 0x6c // NOTE not defined on darwin + XFRM_PROTO_COMP Proto = unix.IPPROTO_COMP XFRM_PROTO_IPSEC_ANY Proto = unix.IPPROTO_RAW ) diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy.go b/vendor/github.com/vishvananda/netlink/xfrm_policy.go deleted file mode 100644 index 6219d277258..00000000000 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy.go +++ /dev/null @@ -1,96 +0,0 @@ -package netlink - -import ( - "fmt" - "net" -) - -// Dir is an enum representing an ipsec template direction. -type Dir uint8 - -const ( - XFRM_DIR_IN Dir = iota - XFRM_DIR_OUT - XFRM_DIR_FWD - XFRM_SOCKET_IN - XFRM_SOCKET_OUT - XFRM_SOCKET_FWD -) - -func (d Dir) String() string { - switch d { - case XFRM_DIR_IN: - return "dir in" - case XFRM_DIR_OUT: - return "dir out" - case XFRM_DIR_FWD: - return "dir fwd" - case XFRM_SOCKET_IN: - return "socket in" - case XFRM_SOCKET_OUT: - return "socket out" - case XFRM_SOCKET_FWD: - return "socket fwd" - } - return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) -} - -// PolicyAction is an enum representing an ipsec policy action. -type PolicyAction uint8 - -const ( - XFRM_POLICY_ALLOW PolicyAction = 0 - XFRM_POLICY_BLOCK PolicyAction = 1 -) - -func (a PolicyAction) String() string { - switch a { - case XFRM_POLICY_ALLOW: - return "allow" - case XFRM_POLICY_BLOCK: - return "block" - default: - return fmt.Sprintf("action %d", a) - } -} - -// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec -// policy. These rules are matched with XfrmState to determine encryption -// and authentication algorithms. -type XfrmPolicyTmpl struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int -} - -func (t XfrmPolicyTmpl) String() string { - return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}", - t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid) -} - -// XfrmPolicy represents an ipsec policy. It represents the overlay network -// and has a list of XfrmPolicyTmpls representing the base addresses of -// the policy. -type XfrmPolicy struct { - Dst *net.IPNet - Src *net.IPNet - Proto Proto - DstPort int - SrcPort int - Dir Dir - Priority int - Index int - Action PolicyAction - Ifindex int - Ifid int - Mark *XfrmMark - Tmpls []XfrmPolicyTmpl -} - -func (p XfrmPolicy) String() string { - return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", - p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) -} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go index a4e132ef55f..d526739cebf 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -1,10 +1,104 @@ package netlink import ( + "fmt" + "net" + "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) +// Dir is an enum representing an ipsec template direction. +type Dir uint8 + +const ( + XFRM_DIR_IN Dir = iota + XFRM_DIR_OUT + XFRM_DIR_FWD + XFRM_SOCKET_IN + XFRM_SOCKET_OUT + XFRM_SOCKET_FWD +) + +func (d Dir) String() string { + switch d { + case XFRM_DIR_IN: + return "dir in" + case XFRM_DIR_OUT: + return "dir out" + case XFRM_DIR_FWD: + return "dir fwd" + case XFRM_SOCKET_IN: + return "socket in" + case XFRM_SOCKET_OUT: + return "socket out" + case XFRM_SOCKET_FWD: + return "socket fwd" + } + return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) +} + +// PolicyAction is an enum representing an ipsec policy action. +type PolicyAction uint8 + +const ( + XFRM_POLICY_ALLOW PolicyAction = 0 + XFRM_POLICY_BLOCK PolicyAction = 1 +) + +func (a PolicyAction) String() string { + switch a { + case XFRM_POLICY_ALLOW: + return "allow" + case XFRM_POLICY_BLOCK: + return "block" + default: + return fmt.Sprintf("action %d", a) + } +} + +// XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec +// policy. These rules are matched with XfrmState to determine encryption +// and authentication algorithms. +type XfrmPolicyTmpl struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + Optional int +} + +func (t XfrmPolicyTmpl) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, Mode: %s, Spi: 0x%x, Reqid: 0x%x}", + t.Dst, t.Src, t.Proto, t.Mode, t.Spi, t.Reqid) +} + +// XfrmPolicy represents an ipsec policy. It represents the overlay network +// and has a list of XfrmPolicyTmpls representing the base addresses of +// the policy. +type XfrmPolicy struct { + Dst *net.IPNet + Src *net.IPNet + Proto Proto + DstPort int + SrcPort int + Dir Dir + Priority int + Index int + Action PolicyAction + Ifindex int + Ifid int + Mark *XfrmMark + Tmpls []XfrmPolicyTmpl +} + +func (p XfrmPolicy) String() string { + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", + p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) +} + func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) { sel.Family = uint16(nl.FAMILY_V4) if policy.Dst != nil { @@ -75,10 +169,12 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl]) userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst) userTmpl.Saddr.FromIP(tmpl.Src) + userTmpl.Family = uint16(nl.GetIPFamily(tmpl.Dst)) userTmpl.XfrmId.Proto = uint8(tmpl.Proto) userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi)) userTmpl.Mode = uint8(tmpl.Mode) userTmpl.Reqid = uint32(tmpl.Reqid) + userTmpl.Optional = uint8(tmpl.Optional) userTmpl.Aalgos = ^uint32(0) userTmpl.Ealgos = ^uint32(0) userTmpl.Calgos = ^uint32(0) @@ -92,8 +188,10 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) - req.AddData(ifId) + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } _, err := req.Execute(unix.NETLINK_XFRM, 0) return err @@ -188,8 +286,10 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) - req.AddData(ifId) + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } resType := nl.XFRM_MSG_NEWPOLICY if nlProto == nl.XFRM_MSG_DELPOLICY { @@ -218,8 +318,8 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { var policy XfrmPolicy - policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD) - policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS) + policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, uint16(family)) + policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, uint16(family)) policy.Proto = Proto(msg.Sel.Proto) policy.DstPort = int(nl.Swap16(msg.Sel.Dport)) policy.SrcPort = int(nl.Swap16(msg.Sel.Sport)) @@ -247,6 +347,7 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { resTmpl.Mode = Mode(tmpl.Mode) resTmpl.Spi = int(nl.Swap32(tmpl.XfrmId.Spi)) resTmpl.Reqid = int(tmpl.Reqid) + resTmpl.Optional = int(tmpl.Optional) policy.Tmpls = append(policy.Tmpls, resTmpl) } case nl.XFRMA_MARK: diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state.go b/vendor/github.com/vishvananda/netlink/xfrm_state.go deleted file mode 100644 index 483d8934a8f..00000000000 --- a/vendor/github.com/vishvananda/netlink/xfrm_state.go +++ /dev/null @@ -1,131 +0,0 @@ -package netlink - -import ( - "fmt" - "net" - "time" -) - -// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. -type XfrmStateAlgo struct { - Name string - Key []byte - TruncateLen int // Auth only - ICVLen int // AEAD only -} - -func (a XfrmStateAlgo) String() string { - base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key) - if a.TruncateLen != 0 { - base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen) - } - if a.ICVLen != 0 { - base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen) - } - return fmt.Sprintf("%s}", base) -} - -// EncapType is an enum representing the optional packet encapsulation. -type EncapType uint8 - -const ( - XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 - XFRM_ENCAP_ESPINUDP -) - -func (e EncapType) String() string { - switch e { - case XFRM_ENCAP_ESPINUDP_NONIKE: - return "espinudp-non-ike" - case XFRM_ENCAP_ESPINUDP: - return "espinudp" - } - return "unknown" -} - -// XfrmStateEncap represents the encapsulation to use for the ipsec encryption. -type XfrmStateEncap struct { - Type EncapType - SrcPort int - DstPort int - OriginalAddress net.IP -} - -func (e XfrmStateEncap) String() string { - return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}", - e.Type, e.SrcPort, e.DstPort, e.OriginalAddress) -} - -// XfrmStateLimits represents the configured limits for the state. -type XfrmStateLimits struct { - ByteSoft uint64 - ByteHard uint64 - PacketSoft uint64 - PacketHard uint64 - TimeSoft uint64 - TimeHard uint64 - TimeUseSoft uint64 - TimeUseHard uint64 -} - -// XfrmStateStats represents the current number of bytes/packets -// processed by this State, the State's installation and first use -// time and the replay window counters. -type XfrmStateStats struct { - ReplayWindow uint32 - Replay uint32 - Failed uint32 - Bytes uint64 - Packets uint64 - AddTime uint64 - UseTime uint64 -} - -// XfrmState represents the state of an ipsec policy. It optionally -// contains an XfrmStateAlgo for encryption and one for authentication. -type XfrmState struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int - ReplayWindow int - Limits XfrmStateLimits - Statistics XfrmStateStats - Mark *XfrmMark - OutputMark int - Ifid int - Auth *XfrmStateAlgo - Crypt *XfrmStateAlgo - Aead *XfrmStateAlgo - Encap *XfrmStateEncap - ESN bool -} - -func (sa XfrmState) String() string { - return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %d, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t", - sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN) -} -func (sa XfrmState) Print(stats bool) string { - if !stats { - return sa.String() - } - at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) - ut := "-" - if sa.Statistics.UseTime > 0 { - ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) - } - return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ - "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", - sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), - sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, - sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) -} - -func printLimit(lmt uint64) string { - if lmt == ^uint64(0) { - return "(INF)" - } - return fmt.Sprintf("%d", lmt) -} diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go index 66c99423c47..554f2498c2c 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -2,12 +2,154 @@ package netlink import ( "fmt" + "net" + "time" "unsafe" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" ) +// XfrmStateAlgo represents the algorithm to use for the ipsec encryption. +type XfrmStateAlgo struct { + Name string + Key []byte + TruncateLen int // Auth only + ICVLen int // AEAD only +} + +func (a XfrmStateAlgo) String() string { + base := fmt.Sprintf("{Name: %s, Key: 0x%x", a.Name, a.Key) + if a.TruncateLen != 0 { + base = fmt.Sprintf("%s, Truncate length: %d", base, a.TruncateLen) + } + if a.ICVLen != 0 { + base = fmt.Sprintf("%s, ICV length: %d", base, a.ICVLen) + } + return fmt.Sprintf("%s}", base) +} + +// EncapType is an enum representing the optional packet encapsulation. +type EncapType uint8 + +const ( + XFRM_ENCAP_ESPINUDP_NONIKE EncapType = iota + 1 + XFRM_ENCAP_ESPINUDP +) + +func (e EncapType) String() string { + switch e { + case XFRM_ENCAP_ESPINUDP_NONIKE: + return "espinudp-non-ike" + case XFRM_ENCAP_ESPINUDP: + return "espinudp" + } + return "unknown" +} + +// XfrmStateEncap represents the encapsulation to use for the ipsec encryption. +type XfrmStateEncap struct { + Type EncapType + SrcPort int + DstPort int + OriginalAddress net.IP +} + +func (e XfrmStateEncap) String() string { + return fmt.Sprintf("{Type: %s, Srcport: %d, DstPort: %d, OriginalAddress: %v}", + e.Type, e.SrcPort, e.DstPort, e.OriginalAddress) +} + +// XfrmStateLimits represents the configured limits for the state. +type XfrmStateLimits struct { + ByteSoft uint64 + ByteHard uint64 + PacketSoft uint64 + PacketHard uint64 + TimeSoft uint64 + TimeHard uint64 + TimeUseSoft uint64 + TimeUseHard uint64 +} + +// XfrmStateStats represents the current number of bytes/packets +// processed by this State, the State's installation and first use +// time and the replay window counters. +type XfrmStateStats struct { + ReplayWindow uint32 + Replay uint32 + Failed uint32 + Bytes uint64 + Packets uint64 + AddTime uint64 + UseTime uint64 +} + +// XfrmReplayState represents the sequence number states for +// "legacy" anti-replay mode. +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func (r XfrmReplayState) String() string { + return fmt.Sprintf("{OSeq: 0x%x, Seq: 0x%x, BitMap: 0x%x}", + r.OSeq, r.Seq, r.BitMap) +} + +// XfrmState represents the state of an ipsec policy. It optionally +// contains an XfrmStateAlgo for encryption and one for authentication. +type XfrmState struct { + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + ReplayWindow int + Limits XfrmStateLimits + Statistics XfrmStateStats + Mark *XfrmMark + OutputMark *XfrmMark + Ifid int + Auth *XfrmStateAlgo + Crypt *XfrmStateAlgo + Aead *XfrmStateAlgo + Encap *XfrmStateEncap + ESN bool + DontEncapDSCP bool + OSeqMayWrap bool + Replay *XfrmReplayState + Selector *XfrmPolicy +} + +func (sa XfrmState) String() string { + return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t, DontEncapDSCP: %t, OSeqMayWrap: %t, Replay: %v", + sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN, sa.DontEncapDSCP, sa.OSeqMayWrap, sa.Replay) +} +func (sa XfrmState) Print(stats bool) string { + if !stats { + return sa.String() + } + at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) + ut := "-" + if sa.Statistics.UseTime > 0 { + ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) + } + return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ + "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", + sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), + sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, + sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) +} + +func printLimit(lmt uint64) string { + if lmt == ^uint64(0) { + return "(INF)" + } + return fmt.Sprintf("%d", lmt) +} func writeStateAlgo(a *XfrmStateAlgo) []byte { algo := nl.XfrmAlgo{ AlgKeyLen: uint32(len(a.Key) * 8), @@ -77,6 +219,14 @@ func writeReplayEsn(replayWindow int) []byte { return replayEsn.Serialize() } +func writeReplay(r *XfrmReplayState) []byte { + return (&nl.XfrmReplayState{ + OSeq: r.OSeq, + Seq: r.Seq, + BitMap: r.BitMap, + }).Serialize() +} + // XfrmStateAdd will add an xfrm state to the system. // Equivalent to: `ip xfrm state add $state` func XfrmStateAdd(state *XfrmState) error { @@ -111,7 +261,7 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { // A state with spi 0 can't be deleted so don't allow it to be set if state.Spi == 0 { - return fmt.Errorf("Spi must be set when adding xfrm state.") + return fmt.Errorf("Spi must be set when adding xfrm state") } req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) @@ -158,13 +308,34 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow)) req.AddData(out) } - if state.OutputMark != 0 { - out := nl.NewRtAttr(nl.XFRMA_OUTPUT_MARK, nl.Uint32Attr(uint32(state.OutputMark))) + if state.OutputMark != nil { + out := nl.NewRtAttr(nl.XFRMA_SET_MARK, nl.Uint32Attr(state.OutputMark.Value)) + req.AddData(out) + if state.OutputMark.Mask != 0 { + out = nl.NewRtAttr(nl.XFRMA_SET_MARK_MASK, nl.Uint32Attr(state.OutputMark.Mask)) + req.AddData(out) + } + } + if state.OSeqMayWrap || state.DontEncapDSCP { + var flags uint32 + if state.DontEncapDSCP { + flags |= nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP + } + if state.OSeqMayWrap { + flags |= nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP + } + out := nl.NewRtAttr(nl.XFRMA_SA_EXTRA_FLAGS, nl.Uint32Attr(flags)) + req.AddData(out) + } + if state.Replay != nil { + out := nl.NewRtAttr(nl.XFRMA_REPLAY_VAL, writeReplay(state.Replay)) req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) - req.AddData(ifId) + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } _, err := req.Execute(unix.NETLINK_XFRM, 0) return err @@ -180,7 +351,6 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { msg.Min = 0x100 msg.Max = 0xffffffff req.AddData(msg) - if state.Mark != nil { out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark)) req.AddData(out) @@ -277,8 +447,10 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) - req.AddData(ifId) + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } resType := nl.XFRM_MSG_NEWSA if nlProto == nl.XFRM_MSG_DELSA { @@ -306,7 +478,6 @@ var familyError = fmt.Errorf("family error") func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { var state XfrmState - state.Dst = msg.Id.Daddr.ToIP() state.Src = msg.Saddr.ToIP() state.Proto = Proto(msg.Id.Proto) @@ -316,20 +487,25 @@ func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { state.ReplayWindow = int(msg.ReplayWindow) lftToLimits(&msg.Lft, &state.Limits) curToStats(&msg.Curlft, &msg.Stats, &state.Statistics) + state.Selector = &XfrmPolicy{ + Dst: msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, msg.Sel.Family), + Src: msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, msg.Sel.Family), + Proto: Proto(msg.Sel.Proto), + DstPort: int(nl.Swap16(msg.Sel.Dport)), + SrcPort: int(nl.Swap16(msg.Sel.Sport)), + Ifindex: int(msg.Sel.Ifindex), + } return &state } func parseXfrmState(m []byte, family int) (*XfrmState, error) { msg := nl.DeserializeXfrmUsersaInfo(m) - // This is mainly for the state dump if family != FAMILY_ALL && family != int(msg.Family) { return nil, familyError } - state := xfrmStateFromXfrmUsersaInfo(msg) - attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:]) if err != nil { return nil, err @@ -377,10 +553,37 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) { state.Mark = new(XfrmMark) state.Mark.Value = mark.Value state.Mark.Mask = mark.Mask - case nl.XFRMA_OUTPUT_MARK: - state.OutputMark = int(native.Uint32(attr.Value)) + case nl.XFRMA_SA_EXTRA_FLAGS: + flags := native.Uint32(attr.Value) + if (flags & nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP) != 0 { + state.DontEncapDSCP = true + } + if (flags & nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP) != 0 { + state.OSeqMayWrap = true + } + case nl.XFRMA_SET_MARK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Value = native.Uint32(attr.Value) + case nl.XFRMA_SET_MARK_MASK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Mask = native.Uint32(attr.Value) + if state.OutputMark.Mask == 0xffffffff { + state.OutputMark.Mask = 0 + } case nl.XFRMA_IF_ID: state.Ifid = int(native.Uint32(attr.Value)) + case nl.XFRMA_REPLAY_VAL: + if state.Replay == nil { + state.Replay = new(XfrmReplayState) + } + replay := nl.DeserializeXfrmReplayState(attr.Value[:]) + state.Replay.OSeq = replay.OSeq + state.Replay.Seq = replay.Seq + state.Replay.BitMap = replay.BitMap } } @@ -457,6 +660,9 @@ func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo { msg.Id.Spi = nl.Swap32(uint32(state.Spi)) msg.Reqid = uint32(state.Reqid) msg.ReplayWindow = uint8(state.ReplayWindow) - + msg.Sel = nl.XfrmSelector{} + if state.Selector != nil { + selFromPolicy(&msg.Sel, state.Selector) + } return msg } diff --git a/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go b/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go new file mode 100644 index 00000000000..12fdd26d79a --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/xfrm_unspecified.go @@ -0,0 +1,7 @@ +//go:build !linux +// +build !linux + +package netlink + +type XfrmPolicy struct{} +type XfrmState struct{} diff --git a/vendor/github.com/vishvananda/netns/.golangci.yml b/vendor/github.com/vishvananda/netns/.golangci.yml new file mode 100644 index 00000000000..600bef78e2b --- /dev/null +++ b/vendor/github.com/vishvananda/netns/.golangci.yml @@ -0,0 +1,2 @@ +run: + timeout: 5m diff --git a/vendor/github.com/vishvananda/netns/README.md b/vendor/github.com/vishvananda/netns/README.md index 6b45cfb892e..bdfedbe81fa 100644 --- a/vendor/github.com/vishvananda/netns/README.md +++ b/vendor/github.com/vishvananda/netns/README.md @@ -23,6 +23,7 @@ import ( "fmt" "net" "runtime" + "github.com/vishvananda/netns" ) diff --git a/vendor/github.com/vishvananda/netns/doc.go b/vendor/github.com/vishvananda/netns/doc.go new file mode 100644 index 00000000000..cd4093a4d7e --- /dev/null +++ b/vendor/github.com/vishvananda/netns/doc.go @@ -0,0 +1,9 @@ +// Package netns allows ultra-simple network namespace handling. NsHandles +// can be retrieved and set. Note that the current namespace is thread +// local so actions that set and reset namespaces should use LockOSThread +// to make sure the namespace doesn't change due to a goroutine switch. +// It is best to close NsHandles when you are done with them. This can be +// accomplished via a `defer ns.Close()` on the handle. Changing namespaces +// requires elevated privileges, so in most cases this code needs to be run +// as root. +package netns diff --git a/vendor/github.com/vishvananda/netns/netns_linux.go b/vendor/github.com/vishvananda/netns/netns_linux.go index b1e3b07c09c..2ed7c7e2fa4 100644 --- a/vendor/github.com/vishvananda/netns/netns_linux.go +++ b/vendor/github.com/vishvananda/netns/netns_linux.go @@ -1,77 +1,105 @@ -// +build linux - package netns import ( "fmt" - "io/ioutil" "os" + "path" "path/filepath" - "runtime" "strconv" "strings" - "syscall" + + "golang.org/x/sys/unix" ) -// SYS_SETNS syscall allows changing the namespace of the current process. -var SYS_SETNS = map[string]uintptr{ - "386": 346, - "amd64": 308, - "arm64": 268, - "arm": 375, - "mips": 4344, - "mipsle": 4344, - "mips64le": 4344, - "ppc64": 350, - "ppc64le": 350, - "riscv64": 268, - "s390x": 339, -}[runtime.GOARCH] - -// Deprecated: use syscall pkg instead (go >= 1.5 needed). +// Deprecated: use golang.org/x/sys/unix pkg instead. const ( - CLONE_NEWUTS = 0x04000000 /* New utsname group? */ - CLONE_NEWIPC = 0x08000000 /* New ipcs */ - CLONE_NEWUSER = 0x10000000 /* New user namespace */ - CLONE_NEWPID = 0x20000000 /* New pid namespace */ - CLONE_NEWNET = 0x40000000 /* New network namespace */ - CLONE_IO = 0x80000000 /* Get io context */ + CLONE_NEWUTS = unix.CLONE_NEWUTS /* New utsname group? */ + CLONE_NEWIPC = unix.CLONE_NEWIPC /* New ipcs */ + CLONE_NEWUSER = unix.CLONE_NEWUSER /* New user namespace */ + CLONE_NEWPID = unix.CLONE_NEWPID /* New pid namespace */ + CLONE_NEWNET = unix.CLONE_NEWNET /* New network namespace */ + CLONE_IO = unix.CLONE_IO /* Get io context */ ) -// Setns sets namespace using syscall. Note that this should be a method -// in syscall but it has not been added. +const bindMountPath = "/run/netns" /* Bind mount path for named netns */ + +// Setns sets namespace using golang.org/x/sys/unix.Setns. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. func Setns(ns NsHandle, nstype int) (err error) { - _, _, e1 := syscall.Syscall(SYS_SETNS, uintptr(ns), uintptr(nstype), 0) - if e1 != 0 { - err = e1 - } - return + return unix.Setns(int(ns), nstype) } // Set sets the current network namespace to the namespace represented // by NsHandle. func Set(ns NsHandle) (err error) { - return Setns(ns, CLONE_NEWNET) + return unix.Setns(int(ns), unix.CLONE_NEWNET) } // New creates a new network namespace, sets it as current and returns // a handle to it. func New() (ns NsHandle, err error) { - if err := syscall.Unshare(CLONE_NEWNET); err != nil { + if err := unix.Unshare(unix.CLONE_NEWNET); err != nil { return -1, err } return Get() } +// NewNamed creates a new named network namespace, sets it as current, +// and returns a handle to it +func NewNamed(name string) (NsHandle, error) { + if _, err := os.Stat(bindMountPath); os.IsNotExist(err) { + err = os.MkdirAll(bindMountPath, 0755) + if err != nil { + return None(), err + } + } + + newNs, err := New() + if err != nil { + return None(), err + } + + namedPath := path.Join(bindMountPath, name) + + f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444) + if err != nil { + newNs.Close() + return None(), err + } + f.Close() + + nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid()) + err = unix.Mount(nsPath, namedPath, "bind", unix.MS_BIND, "") + if err != nil { + newNs.Close() + return None(), err + } + + return newNs, nil +} + +// DeleteNamed deletes a named network namespace +func DeleteNamed(name string) error { + namedPath := path.Join(bindMountPath, name) + + err := unix.Unmount(namedPath, unix.MNT_DETACH) + if err != nil { + return err + } + + return os.Remove(namedPath) +} + // Get gets a handle to the current threads network namespace. func Get() (NsHandle, error) { - return GetFromThread(os.Getpid(), syscall.Gettid()) + return GetFromThread(os.Getpid(), unix.Gettid()) } // GetFromPath gets a handle to a network namespace // identified by the path func GetFromPath(path string) (NsHandle, error) { - fd, err := syscall.Open(path, syscall.O_RDONLY, 0) + fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0) if err != nil { return -1, err } @@ -81,7 +109,7 @@ func GetFromPath(path string) (NsHandle, error) { // GetFromName gets a handle to a named network namespace such as one // created by `ip netns add`. func GetFromName(name string) (NsHandle, error) { - return GetFromPath(fmt.Sprintf("/var/run/netns/%s", name)) + return GetFromPath(filepath.Join(bindMountPath, name)) } // GetFromPid gets a handle to the network namespace of a given pid. @@ -106,33 +134,38 @@ func GetFromDocker(id string) (NsHandle, error) { } // borrowed from docker/utils/utils.go -func findCgroupMountpoint(cgroupType string) (string, error) { - output, err := ioutil.ReadFile("/proc/mounts") +func findCgroupMountpoint(cgroupType string) (int, string, error) { + output, err := os.ReadFile("/proc/mounts") if err != nil { - return "", err + return -1, "", err } // /proc/mounts has 6 fields per line, one mount per line, e.g. // cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0 for _, line := range strings.Split(string(output), "\n") { parts := strings.Split(line, " ") - if len(parts) == 6 && parts[2] == "cgroup" { - for _, opt := range strings.Split(parts[3], ",") { - if opt == cgroupType { - return parts[1], nil + if len(parts) == 6 { + switch parts[2] { + case "cgroup2": + return 2, parts[1], nil + case "cgroup": + for _, opt := range strings.Split(parts[3], ",") { + if opt == cgroupType { + return 1, parts[1], nil + } } } } } - return "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType) + return -1, "", fmt.Errorf("cgroup mountpoint not found for %s", cgroupType) } // Returns the relative path to the cgroup docker is running in. // borrowed from docker/utils/utils.go // modified to get the docker pid instead of using /proc/self -func getThisCgroup(cgroupType string) (string, error) { - dockerpid, err := ioutil.ReadFile("/var/run/docker.pid") +func getDockerCgroup(cgroupVer int, cgroupType string) (string, error) { + dockerpid, err := os.ReadFile("/var/run/docker.pid") if err != nil { return "", err } @@ -144,14 +177,15 @@ func getThisCgroup(cgroupType string) (string, error) { if err != nil { return "", err } - output, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + output, err := os.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) if err != nil { return "", err } for _, line := range strings.Split(string(output), "\n") { parts := strings.Split(line, ":") // any type used by docker should work - if parts[1] == cgroupType { + if (cgroupVer == 1 && parts[1] == cgroupType) || + (cgroupVer == 2 && parts[1] == "") { return parts[2], nil } } @@ -163,40 +197,56 @@ func getThisCgroup(cgroupType string) (string, error) { // modified to only return the first pid // modified to glob with id // modified to search for newer docker containers +// modified to look for cgroups v2 func getPidForContainer(id string) (int, error) { pid := 0 // memory is chosen randomly, any cgroup used by docker works cgroupType := "memory" - cgroupRoot, err := findCgroupMountpoint(cgroupType) + cgroupVer, cgroupRoot, err := findCgroupMountpoint(cgroupType) if err != nil { return pid, err } - cgroupThis, err := getThisCgroup(cgroupType) + cgroupDocker, err := getDockerCgroup(cgroupVer, cgroupType) if err != nil { return pid, err } id += "*" + var pidFile string + if cgroupVer == 1 { + pidFile = "tasks" + } else if cgroupVer == 2 { + pidFile = "cgroup.procs" + } else { + return -1, fmt.Errorf("Invalid cgroup version '%d'", cgroupVer) + } + attempts := []string{ - filepath.Join(cgroupRoot, cgroupThis, id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, id, pidFile), // With more recent lxc versions use, cgroup will be in lxc/ - filepath.Join(cgroupRoot, cgroupThis, "lxc", id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "lxc", id, pidFile), // With more recent docker, cgroup will be in docker/ - filepath.Join(cgroupRoot, cgroupThis, "docker", id, "tasks"), + filepath.Join(cgroupRoot, cgroupDocker, "docker", id, pidFile), // Even more recent docker versions under systemd use docker-.scope/ - filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", pidFile), // Even more recent docker versions under cgroup/systemd/docker// - filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"), - // Kubernetes with docker and CNI is even more different - filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, "tasks"), - // Another flavor of containers location in recent kubernetes 1.11+ - filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "kubepods-besteffort.slice", "*", "docker-"+id+".scope", "tasks"), - // When runs inside of a container with recent kubernetes 1.11+ - filepath.Join(cgroupRoot, "kubepods.slice", "kubepods-besteffort.slice", "*", "docker-"+id+".scope", "tasks"), + filepath.Join(cgroupRoot, "..", "systemd", "docker", id, pidFile), + // Kubernetes with docker and CNI is even more different. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, pidFile), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "pod*", id, pidFile), + // Another flavor of containers location in recent kubernetes 1.11+. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, cgroupDocker, "kubepods.slice", "*", "docker-"+id+".scope", pidFile), + // When runs inside of a container with recent kubernetes 1.11+. Works for BestEffort and Burstable QoS + filepath.Join(cgroupRoot, "kubepods.slice", "*.slice", "*", "docker-"+id+".scope", pidFile), + // Same as above but for Guaranteed QoS + filepath.Join(cgroupRoot, "kubepods.slice", "*", "docker-"+id+".scope", pidFile), } var filename string @@ -214,7 +264,7 @@ func getPidForContainer(id string) (int, error) { return pid, fmt.Errorf("Unable to find container: %v", id[:len(id)-1]) } - output, err := ioutil.ReadFile(filename) + output, err := os.ReadFile(filename) if err != nil { return pid, err } diff --git a/vendor/github.com/vishvananda/netns/netns_unspecified.go b/vendor/github.com/vishvananda/netns/netns_others.go similarity index 63% rename from vendor/github.com/vishvananda/netns/netns_unspecified.go rename to vendor/github.com/vishvananda/netns/netns_others.go index d06af62b68a..0489837741b 100644 --- a/vendor/github.com/vishvananda/netns/netns_unspecified.go +++ b/vendor/github.com/vishvananda/netns/netns_others.go @@ -1,3 +1,4 @@ +//go:build !linux // +build !linux package netns @@ -10,6 +11,14 @@ var ( ErrNotImplemented = errors.New("not implemented") ) +// Setns sets namespace using golang.org/x/sys/unix.Setns on Linux. It +// is not implemented on other platforms. +// +// Deprecated: Use golang.org/x/sys/unix.Setns instead. +func Setns(ns NsHandle, nstype int) (err error) { + return ErrNotImplemented +} + func Set(ns NsHandle) (err error) { return ErrNotImplemented } @@ -18,6 +27,14 @@ func New() (ns NsHandle, err error) { return -1, ErrNotImplemented } +func NewNamed(name string) (NsHandle, error) { + return -1, ErrNotImplemented +} + +func DeleteNamed(name string) error { + return ErrNotImplemented +} + func Get() (NsHandle, error) { return -1, ErrNotImplemented } diff --git a/vendor/github.com/vishvananda/netns/netns.go b/vendor/github.com/vishvananda/netns/nshandle_linux.go similarity index 60% rename from vendor/github.com/vishvananda/netns/netns.go rename to vendor/github.com/vishvananda/netns/nshandle_linux.go index dd2f21570a4..1baffb66acb 100644 --- a/vendor/github.com/vishvananda/netns/netns.go +++ b/vendor/github.com/vishvananda/netns/nshandle_linux.go @@ -1,16 +1,9 @@ -// Package netns allows ultra-simple network namespace handling. NsHandles -// can be retrieved and set. Note that the current namespace is thread -// local so actions that set and reset namespaces should use LockOSThread -// to make sure the namespace doesn't change due to a goroutine switch. -// It is best to close NsHandles when you are done with them. This can be -// accomplished via a `defer ns.Close()` on the handle. Changing namespaces -// requires elevated privileges, so in most cases this code needs to be run -// as root. package netns import ( "fmt" - "syscall" + + "golang.org/x/sys/unix" ) // NsHandle is a handle to a network namespace. It can be cast directly @@ -24,11 +17,11 @@ func (ns NsHandle) Equal(other NsHandle) bool { if ns == other { return true } - var s1, s2 syscall.Stat_t - if err := syscall.Fstat(int(ns), &s1); err != nil { + var s1, s2 unix.Stat_t + if err := unix.Fstat(int(ns), &s1); err != nil { return false } - if err := syscall.Fstat(int(other), &s2); err != nil { + if err := unix.Fstat(int(other), &s2); err != nil { return false } return (s1.Dev == s2.Dev) && (s1.Ino == s2.Ino) @@ -36,11 +29,11 @@ func (ns NsHandle) Equal(other NsHandle) bool { // String shows the file descriptor number and its dev and inode. func (ns NsHandle) String() string { - var s syscall.Stat_t if ns == -1 { - return "NS(None)" + return "NS(none)" } - if err := syscall.Fstat(int(ns), &s); err != nil { + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { return fmt.Sprintf("NS(%d: unknown)", ns) } return fmt.Sprintf("NS(%d: %d, %d)", ns, s.Dev, s.Ino) @@ -49,11 +42,11 @@ func (ns NsHandle) String() string { // UniqueId returns a string which uniquely identifies the namespace // associated with the network handle. func (ns NsHandle) UniqueId() string { - var s syscall.Stat_t if ns == -1 { return "NS(none)" } - if err := syscall.Fstat(int(ns), &s); err != nil { + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { return "NS(unknown)" } return fmt.Sprintf("NS(%d:%d)", s.Dev, s.Ino) @@ -67,10 +60,10 @@ func (ns NsHandle) IsOpen() bool { // Close closes the NsHandle and resets its file descriptor to -1. // It is not safe to use an NsHandle after Close() is called. func (ns *NsHandle) Close() error { - if err := syscall.Close(int(*ns)); err != nil { + if err := unix.Close(int(*ns)); err != nil { return err } - (*ns) = -1 + *ns = -1 return nil } diff --git a/vendor/github.com/vishvananda/netns/nshandle_others.go b/vendor/github.com/vishvananda/netns/nshandle_others.go new file mode 100644 index 00000000000..af727bc091c --- /dev/null +++ b/vendor/github.com/vishvananda/netns/nshandle_others.go @@ -0,0 +1,45 @@ +//go:build !linux +// +build !linux + +package netns + +// NsHandle is a handle to a network namespace. It can only be used on Linux, +// but provides stub methods on other platforms. +type NsHandle int + +// Equal determines if two network handles refer to the same network +// namespace. It is only implemented on Linux. +func (ns NsHandle) Equal(_ NsHandle) bool { + return false +} + +// String shows the file descriptor number and its dev and inode. +// It is only implemented on Linux, and returns "NS(none)" on other +// platforms. +func (ns NsHandle) String() string { + return "NS(none)" +} + +// UniqueId returns a string which uniquely identifies the namespace +// associated with the network handle. It is only implemented on Linux, +// and returns "NS(none)" on other platforms. +func (ns NsHandle) UniqueId() string { + return "NS(none)" +} + +// IsOpen returns true if Close() has not been called. It is only implemented +// on Linux and always returns false on other platforms. +func (ns NsHandle) IsOpen() bool { + return false +} + +// Close closes the NsHandle and resets its file descriptor to -1. +// It is only implemented on Linux. +func (ns *NsHandle) Close() error { + return nil +} + +// None gets an empty (closed) NsHandle. +func None() NsHandle { + return NsHandle(-1) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 2fde1af55ff..6ef7d552e37 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -71,12 +71,12 @@ github.com/syndtr/gocapability/capability # github.com/urfave/cli v1.22.16 ## explicit; go 1.11 github.com/urfave/cli -# github.com/vishvananda/netlink v1.1.0 +# github.com/vishvananda/netlink v1.3.0 ## explicit; go 1.12 github.com/vishvananda/netlink github.com/vishvananda/netlink/nl -# github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df -## explicit; go 1.12 +# github.com/vishvananda/netns v0.0.4 +## explicit; go 1.17 github.com/vishvananda/netns # golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 ## explicit; go 1.18