一次虚拟机网卡丢包问题分析过程

一次虚拟机网卡丢包问题分析过程

Posted by lwk on July 28, 2022

业务反馈有几台机器有超时问题,经过登录机器发现对应网卡有丢包,难道这个丢包和业务超时有关系? 从ifconfig命令看到网卡丢包大概1秒钟一次,1秒钟一次的话,这个丢包持续了大概8268621 / 3600 / 24 = 95.68946759259259天。看了下机器的创建时间,至今已经96天了,那也就是说机器创建出来的时候就出现了丢包,并且 维持在1秒钟一个包的频率。 image

image 我们用systemtap方法进行分析

#! /usr/bin/env stap

probe kernel.function("__netif_receive_skb_core").label("drop") {
    printf("0x%04X\n", ntohs($skb->protocol))
}

image

image

通过tcpdump抓下对应包发现,和STP有关系。关于stp可以参考https://my.oschina.net/u/4302478/blog/4694985 在虚拟机上抓包

image

  • 解决办法 针对不同内核解决的方法也不同,主要区别是3.内核的stp还是内核模块,而4.版本内核已经不再将stp设置为内核模块 1)针对内核版本是3.*机器对应解决方法是:modprobe stp 执行完modprobe stp之后,发现PVM的eth0网卡dropped已经不再增加。 image

2)第二种方法就是升级内核到4.*版本以上

image

pvm的eth0网卡丢包实际对业务无影响,但可能会对正常排查问题产生干扰。因此,最好是按照以上两种解决方法去解决丢包的问题。 PS:systemtap要依赖于kernel-debuginfo,所以需要在机器上装debuginfo-common和debuginfo才能使用。

  • 后续 虽然找到了虚拟机网卡丢包的原因,也有了解决措施,但这个和业务超时并无直接关联,网卡丢包一直都有,但业务超时是刚出现的,并且后来自动恢复了,所以针对业务超时问题还没有查出根因,需要跟进一步的分析业务超时的情况。

在网上找了一个更全面的systemtap脚本,如下

#!/usr/bin/stap --all-modules
%{
#include <linux/kernel.h>
#include <linux/net.h>
#include <linux/textsearch.h>
#include <net/checksum.h>
#include <linux/dma-mapping.h>
#include <linux/netdev_features.h>
#include <linux/skbuff.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/udp.h>
#include <uapi/linux/tcp.h>
%}
############################################################
# trace_net_drop.stp
# An example script to mimic the behavior of the dropwatch utility
# Need install kernel-debuginfo and kernel-debuginfo-common before running this script
# Default reports every 5 seconds with timestamp
# Usage example: //-g for guru mode
#stap -g -v --all-modules  trace_net_drop.stp tcp daddr=10.0.0.14 dport=22 //dump all stack when skb dest addr/port 10.0.0.14/22
#stap -g -v --all-modules  trace_net_drop.stp tcp saddr=1.1.1.1 sport=1000 daddr=2.2.2.2 dport=22 match=nf_hook_slow //just dump stack when kfree_skb is called by function "nf_hook_slow"
#stap -g -v --all-modules  trace_net_drop.stp tcp daddr=10.0.0.14 dport=22 filter=tcp_rcv_state_process //dump any backtrace except function kfree_skb is called by "tcp_rcv_state_process"
#stap -g -v --all-modules  -B CONFIG_MODVERSIONS=y trace_net_drop.stp tcp daddr=10.0.0.14 dport=22 filter=tcp_rcv_state_process //add '-B CONFIG_MODVERSIONS=y' if kernel config enable Module versioning support
############################################################
function get_skb_saddr:string(skb:long)
%{
     int ret=-1;
     unsigned int src_port = 0;
     struct udphdr *udp_header;
     struct tcphdr *tcp_header;
     struct sk_buff *skb= (struct sk_buff *)STAP_ARG_skb;
     struct iphdr *ip_header;
     unsigned int src_ip=0;
     if(!skb)
     {
         goto EXIT_F;
     }
     ip_header = (struct iphdr *)skb_network_header(skb);
     if(!ip_header)
     {
       goto EXIT_F;
     }
     src_ip = (unsigned int)ip_header->saddr;
     // printk(KERN_DEBUG "IP addres = %pI4  DEST = %pI4\n", &src_ip, &dest_ip);
EXIT_F:
     snprintf(STAP_RETVALUE, MAXSTRINGLEN, "%d.%d.%d.%d",(unsigned int)((unsigned char *)&src_ip)[0],
              (unsigned int)((unsigned char *)&src_ip)[1],(unsigned int)((unsigned char *)&src_ip)[2],(unsigned int)((unsigned char *)&src_ip)[3]);
%}



function get_skb_daddr:string(skb:long)
%{
     int ret=-1;
     struct udphdr *udp_header;
     struct tcphdr *tcp_header;
     struct sk_buff *skb= (struct sk_buff *)STAP_ARG_skb;
     struct iphdr *ip_header;
     unsigned int dst_ip=0;
     if(!skb)
     {
         goto EXIT_F;
     }
     ip_header = (struct iphdr *)skb_network_header(skb);
     if(!ip_header)
     {
       goto EXIT_F;
     }
     dst_ip = (unsigned int)ip_header->daddr;
EXIT_F:
     snprintf(STAP_RETVALUE, MAXSTRINGLEN, "%d.%d.%d.%d",(unsigned int)((unsigned char *)&dst_ip)[0],
            (unsigned int)((unsigned char *)&dst_ip)[1],(unsigned int)((unsigned char *)&dst_ip)[2],(unsigned int)((unsigned char *)&dst_ip)[3]);
%}

function get_skb_sport:string(skb:long)
%{
     int ret=-1;
     unsigned int src_port = 0;
     struct udphdr *udp_header;
     struct tcphdr *tcp_header;
     struct sk_buff *skb= (struct sk_buff *)STAP_ARG_skb;
     struct iphdr *ip_header;
     if(!skb)
     {
         goto EXIT_F;
     }
     ip_header = (struct iphdr *)skb_network_header(skb);
     if(!ip_header)
     {
       goto EXIT_F;
     }

     if (ip_header->protocol==17) {
            udp_header = (struct udphdr *)skb_transport_header(skb);
            src_port = (unsigned int)ntohs(udp_header->source);
        } else if (ip_header->protocol == 6) {
            tcp_header = (struct tcphdr *)skb_transport_header(skb);
            src_port = (unsigned int)ntohs(tcp_header->source);
        }
//     printk("src_port=%d\r\n",src_port);
EXIT_F:
     snprintf(STAP_RETVALUE, MAXSTRINGLEN, "%d",src_port);
%}


function get_skb_dport:string(skb:long)
%{
     int ret=-1;
     unsigned int dst_port = 0;
     struct udphdr *udp_header;
     struct tcphdr *tcp_header;
     struct sk_buff *skb= (struct sk_buff *)STAP_ARG_skb;
     struct iphdr *ip_header;
     if(!skb)
     {
         goto EXIT_F;
     }
     ip_header = (struct iphdr *)skb_network_header(skb);
     if(!ip_header)
     {
       goto EXIT_F;
     }

     if (ip_header->protocol==17) {
            udp_header = (struct udphdr *)skb_transport_header(skb);
            dst_port = (unsigned int)ntohs(udp_header->dest);
        } else if (ip_header->protocol == 6) {
            tcp_header = (struct tcphdr *)skb_transport_header(skb);
            dst_port = (unsigned int)ntohs(tcp_header->dest);
        }
EXIT_F:
     snprintf(STAP_RETVALUE, MAXSTRINGLEN, "%d",dst_port);
%}

function get_skb_ipproto:string(skb:long)
%{
     char *ipproto = "NONE";
     struct sk_buff *skb= (struct sk_buff *)STAP_ARG_skb;
     struct iphdr *ip_header;
     if(!skb)
     {
         goto EXIT_F;
     }
     ip_header = (struct iphdr *)skb_network_header(skb);
     if(!ip_header)
     {
       goto EXIT_F;
     }

     if (ip_header->protocol == 6) {
            ipproto="TCP";
        }
     else if (ip_header->protocol == 17) {
            ipproto="UDP";
        }
     else if (ip_header->protocol == 1) {
            ipproto="ICMP";//IPPROTO_ICMP
        }

EXIT_F:
     snprintf(STAP_RETVALUE, MAXSTRINGLEN, "%s",ipproto);
%}

global addr = "all"
global port = "all"
global saddr = "all"
global sport = "all"
global daddr = "all"
global dport = "all"
global match = "all"
global filter = "none"
global WatchIpproto = "ALL"
global kfree_skb_stack
global kfree_skb_location
global interval=1 # default interval between output
global timeout=0
global BackTrace = 0
/*function get_param_val:string (mystr:string) %{
     char *ptr;
     int  ch = '=';
     char *strargs = STAP_ARG_mystr;
     ptr=strchr(strargs , ch);
     snprintf(STAP_RETVALUE, MAXSTRINGLEN, "%s",ptr + 1);
%}*/


function usage (msg:string)
{
   printf("%s:\n\n",msg);
   printf("\tall|tcp|udp|icmp: trace proto\n")
   printf("\taddr=ip address:ip address\n")
   printf("\tsaddr=ip source address:ip source address\n")
   printf("\tdaddr=ip dest address:ip destination address\n")
   printf("\tport=port: ip port\n")
   printf("\tsport=source port:source port\n")
   printf("\tdport=dest port:dest port\n")
   printf("\tmatch=<all|kernel function name>:match a specific function or any function\n")
   printf("\tfilter=<all|kernel function name>:filter a specific function or any function\n")
   printf("\tbt: print call trace\n")
   printf("\tinterval=second:Dump trace every 'interval' second\n")
   printf("\texample:\n")
   printf("\ttrace_net_drop.stp tcp saddr=1.1.1.1 sport=5000 daddr=2.2.2.2 dport=80\n\n");
   exit();
}


function print_header (msg:string)
{
    printf("%-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20s\n","IP PROTO","SRC or DST ADDRESS","SRC or DST PORT","Source Address",
                    "Dest Address","Source Port","Dest Port","match function","filter function","INTERVAL(sec)")
    printf("%-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20s  %-20d\n",WatchIpproto,addr,port,saddr,daddr,sport,dport,match,filter,interval)
}

function process_cmdline:long ()
{
    for (i=1; i <= argc; i++) {
        argument = tokenize(argv[i], "=")
       if (argument == "help") {
           usage("Usage");
           exit();
         }
       else if (argument == "all") {
           WatchIpproto = "ALL";
           continue;
         }
       else if (argument == "tcp") {
           WatchIpproto = "TCP";
           continue;
         }
       else if (argument == "udp") {
           WatchIpproto = "UDP";
           continue;
         }
       else if (argument == "icmp") {
           WatchIpproto = "ICMP";
           continue;
        }
       else if (argument == "addr") {
           argv[i]="";
           addr = tokenize(argv[i], "=");
           continue;
        }
       else if (argument == "port") {
           argv[i]="";
           port=tokenize(argv[i], "=");
           continue;
        }
       else if (argument == "saddr") {
           argv[i]="";
           saddr=tokenize(argv[i], "=");
           continue;
         }
       else if (argument == "daddr") {
           argv[i]="";
           daddr=tokenize(argv[i], "=");
           continue;
        }
       else if (argument == "sport") {
           argv[i]="";
           sport=tokenize(argv[i], "=");
           continue;
        }
       else if (argument == "dport") {
           argv[i]="";
           dport=tokenize(argv[i], "=");
           continue;
        }
       else if (argument == "match") {
           argv[i]="";
           match=tokenize(argv[i], "=");
           continue;
        }
       else if (argument == "filter") {
           argv[i]="";
           filter=tokenize(argv[i], "=");
           continue;
        }
      else if (argument == "interval") {
           argv[i]="";
           interval=strtol(tokenize(argv[i], "="),10);
           continue;
        }
      else if (argument == "bt") {
            BackTrace=1;
        }
      else
        usage("process cmdline fail")

    }

    print_header("");
}

probe begin
{
//    printf("param num:%s,%d\r\n",@#,argc)
    if (@1 == "all" || @1 == "tcp" || @1 =="udp" || @1 == "icmp" || @1 =="help") {
         process_cmdline();
    }
    else {
       usage("error cmdline");

    }

   printf("Monitoring for dropped packets\n")
}
probe end { printf("Stopping dropped packet monitor\n") }
# increment a drop counter for every location we drop at
probe kernel.trace("kfree_skb") {

   if(WatchIpproto != "ALL")
   {
      skb_ip_proto=get_skb_ipproto($skb)
      if(skb_ip_proto != WatchIpproto)
         next
   }

   if(addr != "all")
   {
      skb_src_ip=get_skb_saddr($skb)
      skb_dst_ip=get_skb_daddr($skb)
      if(addr != skb_src_ip && addr != skb_dst_ip)
        next
   }
   if(port != "all")
   {
      skb_src_port=get_skb_sport($skb)
      skb_dst_port=get_skb_dport($skb)
      if(port != skb_src_port && port != skb_dst_port)
        next
   }
   if(saddr != "all")
   {
     skb_src_ip=get_skb_saddr($skb)
     if(saddr != skb_src_ip)
        next
   }
   if(daddr != "all")
   {
      skb_dst_ip=get_skb_daddr($skb)
      if(daddr != skb_dst_ip)
        next
   }
   if(sport != "all")
   {
      skb_src_port=get_skb_sport($skb)
      if(sport != skb_src_port)
        next
   }
   if(dport != "all")
   {
      skb_dst_port=get_skb_dport($skb)
      if(dport != skb_dst_port)
        next
   }
 //   locations[$location] <<< 1 //systemtap Statistical aggregate
   if(match == "all" && filter== "none")
   {
      if(BackTrace)
        kfree_skb_stack[backtrace()] <<< 1
      else
        kfree_skb_location[$location] <<< 1
   }
   else if(symname($location)==match)
   {
     if(BackTrace)
        kfree_skb_stack[backtrace()] <<< 1
     else
        kfree_skb_location[$location] <<< 1
   }
   else if(symname($location)!=filter && filter!="none")
  {
    if(BackTrace)
        kfree_skb_stack[backtrace()] <<< 1
     else
        kfree_skb_location[$location] <<< 1
  }


}
#Default every 5 seconds report our drop locations
probe timer.sec(1)
{
   if(++timeout != interval)
     next

   timeout=0
   printf("\n====== %s ======\n", ctime(gettimeofday_s()))
   if(BackTrace)
   {
    // add a single plus (+) or minus (-) operator after the VAR or the ARRAY 
     //identifier, the iteration order will be sorted by the ascending or descending 
     //index or value.
     foreach (bt in kfree_skb_stack-) {
     
        printf("%d packets dropped at stack:\n",@count(kfree_skb_stack[bt]))
        print_syms(bt)
      }
     delete kfree_skb_stack
   }
   else
   {
     foreach (lt in kfree_skb_location-) {
       printf("%d packets dropped at %s\n",@count(kfree_skb_location[lt]), symname(lt))

     }
     delete kfree_skb_location
   }

}
### trace_net_drop.stp ends ###