JDK是怎么解决获取容器CPU数不准确的问题

JDK是怎么解决获取容器CPU数不准确的问题

Posted by lwk on December 22, 2021

最近有个同事问我为什么JAVA的API能正确获取到容器的CPU数量,而其他语言获取不到的问题。之前在排查一个线上问题时就涉及到获取容器CPU数量的问题,由于公司的技术栈大多数都是java,所以用java获取容器CPU数量。并且获取到的CPU数量确实是分给容器的真实数量,而不是宿主机的CPU数量。那么就有个疑惑,java是怎么解决获取CPU数量不准的问题的呢?今天就从JDK源码级别去分析下java是如何准确获取容器CPU数量的。

先看下JAVA获取容器数量的接口如:Runtime.getRuntime().availableProcessors();

我们知道java进程底层跑着一个JVM,JVM是JDK的一部分,底层是使用C++实现的。

availableProcessors()调用底层的JDK源码是os::active_processor_count()


int os::active_processor_count() {
  // User has overridden the number of active processors
  if (ActiveProcessorCount > 0) { // 如果指定了ActiveProcessorCount则直接返回该值
    log_trace(os)("active_processor_count: "
                  "active processor count set by user : %d",
                  ActiveProcessorCount);
    return ActiveProcessorCount;
  }

  int active_cpus;
  if (OSContainer::is_containerized()) { // 判断是否是容器,是容器则获取容器对应Cgroup信息
    active_cpus = OSContainer::active_processor_count();
    log_trace(os)("active_processor_count: determined by OSContainer: %d",
                   active_cpus);
  } else {
    active_cpus = os::Linux::active_processor_count();//不是容器则直接获取宿主机的CPU count
  }

  return active_cpus;
}

int OSContainer::active_processor_count() {
  assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
  return cgroup_subsystem->active_processor_count();
}  

int CgroupSubsystem::active_processor_count() {
  int quota_count = 0, share_count = 0;
  int cpu_count, limit_count;
  int result;

  // We use a cache with a timeout to avoid performing expensive
  // computations in the event this function is called frequently.
  // [See 8227006].
  CachingCgroupController* contrl = cpu_controller();
  CachedMetric* cpu_limit = contrl->metrics_cache();
  if (!cpu_limit->should_check_metric()) {
    int val = (int)cpu_limit->value();
    log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %d", val);
    return val;
  }

  cpu_count = limit_count = os::Linux::active_processor_count();//获取宿主机CPU count信息
  int quota  = cpu_quota(); // 获取Cgroup对应cpu.cfs_quota_us、cpu.cfs_period_us以及cpu.shares信息
  int period = cpu_period();
  int share  = cpu_shares();

  if (quota > -1 && period > 0) { // 如果quota大于-1且period大于0
    quota_count = ceilf((float)quota / (float)period);//quota除以period并向上取整
    log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
  }
  if (share > -1) {//如果shares大于-1,
    share_count = ceilf((float)share / (float)PER_CPU_SHARES);//share除以PER_CPU_SHARES并向上取整(#define PER_CPU_SHARES 1024)
    log_trace(os, container)("CPU Share count based on shares: %d", share_count);
  }

  // If both shares and quotas are setup results depend
  // on flag PreferContainerQuotaForCPUCount.
  // If true, limit CPU count to quota
  // If false, use minimum of shares and quotas
  if (quota_count !=0 && share_count != 0) {
    if (PreferContainerQuotaForCPUCount) {//如果JVM参数为True,则返回quota_count
      limit_count = quota_count;
    } else {
      limit_count = MIN2(quota_count, share_count);//取两者最小值
    }
  } else if (quota_count != 0) {
    limit_count = quota_count;
  } else if (share_count != 0) {
    limit_count = share_count;
  }

  result = MIN2(cpu_count, limit_count);//取cpu_count和limit_count两者最小值
  log_trace(os, container)("OSContainer::active_processor_count: %d", result);

  // Update cached metric to avoid re-reading container settings too often
  cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT);

  return result;
}


int CgroupV1Subsystem::cpu_quota() { // 获取Cgroup的cfs_quota_us
  GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_quota_us",
                     "CPU Quota is: %d", "%d", quota);
  return quota;
} 
  
int CgroupV1Subsystem::cpu_period() { // 获取Cgroup的cfs_period_us
  GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_period_us",
                     "CPU Period is: %d", "%d", period);
  return period;
}   

int CgroupV1Subsystem::cpu_shares() { // 获取Cgroup的cpu.shares信息
  GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.shares",
                     "CPU Shares is: %d", "%d", shares);
  // Convert 1024 to no shares setup
  if (shares == 1024) return -1;

  return shares;
}

void os::Linux::initialize_system_info() {
  set_processor_count(sysconf(_SC_NPROCESSORS_CONF));//获取宿主机CPU核数信息
  if (processor_count() == 1) { 
    pid_t pid = os::Linux::gettid();
    char fname[32];
    jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
    FILE *fp = fopen(fname, "r");
    if (fp == NULL) {
      unsafe_chroot_detected = true;
    } else {
      fclose(fp);
    }    
  }
  _physical_memory = (julong)sysconf(_SC_PHYS_PAGES) * (julong)sysconf(_SC_PAGESIZE);
  assert(processor_count() > 0, "linux error");
}

// 获取宿主机的CPU count
int os::Linux::active_processor_count() {
  cpu_set_t cpus;  // can represent at most 1024 (CPU_SETSIZE) processors
  cpu_set_t* cpus_p = &cpus;
  int cpus_size = sizeof(cpu_set_t);

  int configured_cpus = os::processor_count();  // upper bound on available cpus
  int cpu_count = 0;

// old build platforms may not support dynamic cpu sets
#ifdef CPU_ALLOC

  // To enable easy testing of the dynamic path on different platforms we
  // introduce a diagnostic flag: UseCpuAllocPath
  if (configured_cpus >= CPU_SETSIZE || UseCpuAllocPath) {
    // kernel may use a mask bigger than cpu_set_t
    log_trace(os)("active_processor_count: using dynamic path %s"
                  "- configured processors: %d",
                  UseCpuAllocPath ? "(forced) " : "",
                  configured_cpus);
    cpus_p = CPU_ALLOC(configured_cpus);
    if (cpus_p != NULL) {
      cpus_size = CPU_ALLOC_SIZE(configured_cpus);
      // zero it just to be safe
      CPU_ZERO_S(cpus_size, cpus_p);
    }
    else {
       // failed to allocate so fallback to online cpus
       int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN);
       log_trace(os)("active_processor_count: "
                     "CPU_ALLOC failed (%s) - using "
                     "online processor count: %d",
                     os::strerror(errno), online_cpus);
       return online_cpus;
    }
  }
  else {
    log_trace(os)("active_processor_count: using static path - configured processors: %d",
                  configured_cpus);
  }
#else // CPU_ALLOC
// these stubs won't be executed
#define CPU_COUNT_S(size, cpus) -1
#define CPU_FREE(cpus)

  log_trace(os)("active_processor_count: only static path available - configured processors: %d",
                configured_cpus);
#endif // CPU_ALLOC

  // pid 0 means the current thread - which we have to assume represents the process
  if (sched_getaffinity(0, cpus_size, cpus_p) == 0) {
    if (cpus_p != &cpus) { // can only be true when CPU_ALLOC used
      cpu_count = CPU_COUNT_S(cpus_size, cpus_p);
    }
    else {
      cpu_count = CPU_COUNT(cpus_p);
    }
    log_trace(os)("active_processor_count: sched_getaffinity processor count: %d", cpu_count);
  }
  else {
    cpu_count = ::sysconf(_SC_NPROCESSORS_ONLN);
    warning("sched_getaffinity failed (%s)- using online processor count (%d) "
            "which may exceed available processors", os::strerror(errno), cpu_count);
  }

  if (cpus_p != &cpus) { // can only be true when CPU_ALLOC used
    CPU_FREE(cpus_p);
  }

  assert(cpu_count > 0 && cpu_count <= os::processor_count(), "sanity check");
  return cpu_count;
}

// os::processor_count()
static int processor_count() {
  return _processor_count;
}
static void set_processor_count(int count) { _processor_count = count; }

set_processor_count()函数是在os::Linux::initialize_system_info()中进行调用并初始化。

下面举个例子,图显示了一个POD的Cgroup信息,其中包括cpu.shares=512、cpu.cfs_quota_us=100000以及cpu.cfs_period_us=100000

image

根据JDK的源码可知,quota_count=1,share_count=1,根据最小值原理可知容器的CPU count就是1。

因此,从源码可以知道JDK主要是通过Cgroup方式获取容器的CPU数目,当然内存也是如此。