本文通过阅读libvirt源码作为入口,分析了GHashTable数据结构以及初始化和插入过程。
在阅读libvirt代码的define一个domain的时候发现有下面一段代码:
static virDomainObjPtr
virDomainObjListFindByUUIDLocked(virDomainObjListPtr doms,
const unsigned char *uuid)
{
char uuidstr[VIR_UUID_STRING_BUFLEN];
virDomainObjPtr obj;
virUUIDFormat(uuid, uuidstr);
obj = virHashLookup(doms->objs, uuidstr);
if (obj) {
virObjectRef(obj);
virObjectLock(obj);
}
return obj;
}
其中有一个virHashLookup()函数。
再看下virDomainObjListPtr的结构,如下所示:
typedef struct _virDomainObjList virDomainObjList;
typedef virDomainObjList *virDomainObjListPtr;
struct _virDomainObjList {
virObjectRWLockable parent;
/* uuid string -> virDomainObj mapping
* for O(1), lockless lookup-by-uuid */
GHashTable *objs;
/* name -> virDomainObj mapping for O(1),
* lockless lookup-by-name */
GHashTable *objsName;
};
其中有个GHashTable成员,并且成员名是objs,刚好是virHashLookup()函数的其中一个参数。通过查找资料&深入到代码的底层发现,GHashTable是个hashtable数据结构,并且该数据结构主要是glib库中实现的。那接下来看下virHashLookup()函数的定义:
void *
virHashLookup(GHashTable *table,
const char *name)
{
if (!table || !name)
return NULL;
return g_hash_table_lookup(table, name);
}
每个宿主机都在daemon启动初始化驱动的时候初始化全局的GHashTable变量。函数调用流程图如下所示:
接下来重点看g_hash_table_new_full()函数
函数位置位于glib的glib/ghash.c文件中,注意是glib,不是glibc
GHashTable *
g_hash_table_new_full (GHashFunc hash_func,
GEqualFunc key_equal_func,
GDestroyNotify key_destroy_func,
GDestroyNotify value_destroy_func)
{
GHashTable *hash_table;
hash_table = g_slice_new (GHashTable);
g_atomic_ref_count_init (&hash_table->ref_count);
hash_table->nnodes = 0;
hash_table->noccupied = 0;
hash_table->hash_func = hash_func ? hash_func : g_direct_hash;
hash_table->key_equal_func = key_equal_func;
#ifndef G_DISABLE_ASSERT
hash_table->version = 0;
#endif
hash_table->key_destroy_func = key_destroy_func;
hash_table->value_destroy_func = value_destroy_func;
g_hash_table_setup_storage (hash_table);
return hash_table;
}
g_hash_table_new_full()函数就是创建并初始化一个hashtable出来,其中hash_func是一个函数,它为key创建一个hash值;key_equal_func用于比较两个key是否相等;key_destroy_func当你从hash表里删除、销毁一个条目时,glib库会自动调用它释放key所占用的内存空间,这对于key是动态分配内存的hash表来说非常有用;value_destroy_func的作用是释放value占用的内存空间;g_hash_table_setup_storage()函数主要是初始化hashtable大小、mod和mask等,具体代码如下:
static const gint prime_mod [] =
{
1, /* For 1 << 0 */
2,
3,
7,
13,
31,
61,
127,
……
}
static void
g_hash_table_setup_storage (GHashTable *hash_table)
{
gboolean small = FALSE;
/* We want to use small arrays only if:
* - we are running on a system where that makes sense (64 bit); and
* - we are not running under valgrind.
*/
#ifdef USE_SMALL_ARRAYS
small = TRUE;
# ifdef ENABLE_VALGRIND
if (RUNNING_ON_VALGRIND)
small = FALSE;
# endif
#endif
g_hash_table_set_shift (hash_table, HASH_TABLE_MIN_SHIFT);// HASH_TABLE_MIN_SHIFT=3
hash_table->have_big_keys = !small;
hash_table->have_big_values = !small;
hash_table->keys = g_hash_table_realloc_key_or_value_array (NULL, hash_table->size, hash_table->have_big_keys);
hash_table->values = hash_table->keys;
hash_table->hashes = g_new0 (guint, hash_table->size);
}
static void
g_hash_table_set_shift (GHashTable *hash_table, gint shift)
{
hash_table->size = 1 << shift;//shift=3,hash_table->size=8
hash_table->mod = prime_mod [shift];//prime_mode[shift]=7
/* hash_table->size is always a power of two, so we can calculate the mask
* by simply subtracting 1 from it. The leading assertion ensures that
* we're really dealing with a power of two. */
g_assert ((hash_table->size & (hash_table->size - 1)) == 0);
hash_table->mask = hash_table->size - 1; //mask=7
}
以上就完成了GHashTable的创建和初始化工作。接下来就是使用,包括查找、插入、删除和更新。我们先看下g_hash_table_lookup()
gpointer
g_hash_table_lookup (GHashTable *hash_table,
gconstpointer key)
{
guint node_index;
guint node_hash;
g_return_val_if_fail (hash_table != NULL, NULL);
node_index = g_hash_table_lookup_node (hash_table, key, &node_hash);
return HASH_IS_REAL (hash_table->hashes[node_index])
? g_hash_table_fetch_key_or_value (hash_table->values, node_index, hash_table->have_big_values)
: NULL;
}
static inline guint
g_hash_table_lookup_node (GHashTable *hash_table,
gconstpointer key,
guint *hash_return)
{
guint node_index;
guint node_hash;
guint hash_value;
guint first_tombstone = 0;
gboolean have_tombstone = FALSE;
guint step = 0;
hash_value = hash_table->hash_func (key);//根据hash_func()计算key对应的hash值
if (G_UNLIKELY (!HASH_IS_REAL (hash_value)))
hash_value = 2;
*hash_return = hash_value;
node_index = g_hash_table_hash_to_index (hash_table, hash_value);// g_hash_table_hash_to_index()函数实现(hash_value * 11) %hash_table->mod,从初始化可以知道hash_table->mod=7,比如10%7 =3,则这个下标为3
node_hash = hash_table->hashes[node_index];// 取出下标为3的hashs数组中的值,判断他是否被占用了
while (!HASH_IS_UNUSED (node_hash))//node_hash不为0进入循环体,不为0表示已经被占用;为0表示未被占用,直接返回node_index即可。
{
/* We first check if our full hash values
* are equal so we can avoid calling the full-blown
* key equality function in most cases.
*/
if (node_hash == hash_value)// //当hash值相等时,判断key是否相等,如果相等则返回下标值。
{
gpointer node_key = g_hash_table_fetch_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys);
if (hash_table->key_equal_func)
{
if (hash_table->key_equal_func (node_key, key))//判断key是否相等,相等则返回node_index,这里主要看hash_table->key_equal_func是否存在,存在则使用hashtable的key比较函数,在libvirt代码里,这里使用的key比较函数是g_str_equal,即比较两字符串是否相等。
return node_index;
}
else if (node_key == key)
{
return node_index;
}
}
else if (HASH_IS_TOMBSTONE (node_hash) && !have_tombstone) //当hash值不相等是,判断是否为tombstone
{
first_tombstone = node_index;
have_tombstone = TRUE;
}
step++;
node_index += step;
node_index &= hash_table->mask;
node_hash = hash_table->hashes[node_index];
//重点说下step++至node_hash = hash_table->hashes[node_index];这几行代码,这几行代码其实就是随机选择node_index从[0, hash_table->mask]满足条件的位置,当然node_index不是从0开始的,而是采用了开放定址法(线性探测)算法进行计算,因此顺序可能是2,4,6,1,2,0,……,这样的顺序进行遍历。
}
if (have_tombstone)
return first_tombstone;
return node_index;
}
static inline guint
g_hash_table_hash_to_index (GHashTable *hash_table, guint hash)
{
/* Multiply the hash by a small prime before applying the modulo. This
* prevents the table from becoming densely packed, even with a poor hash
* function. A densely packed table would have poor performance on
* workloads with many failed lookups or a high degree of churn. */
return (hash * 11) % hash_table->mod;
}
g_hash_table_lookup_node()函数主要是接受一个key变量,他会返回这个key相对应的下标,利用这个下标node_index,可以方便的得到这个key对映的value和hash。
接下来我们看下GHashTable的插入过程,同样从libvirt代码作为入口开始。Libvirt源码文件src/util/virhash.c使用virHashAddEntry()函数进行GHashTable的插入操作。先看下libvirt调用GHashTable进行插入操作的流程:
int
virHashAddEntry(GHashTable *table, const char *name, void *userdata)
{
if (!table || !name)
return -1;
if (g_hash_table_contains(table, name)) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Duplicate hash table key '%s'"), name);//查找hastable是否已经存在name
return -1;
}
g_hash_table_insert(table, g_strdup(name), userdata);//执行插入操作,这里的key就是UUID;userdata就是vm对象
return 0;
}
因此g_hash_table_insert()函数的key,value分别就是UUID和vm对象
gboolean
g_hash_table_insert (GHashTable *hash_table,
gpointer key,
gpointer value)
{
return g_hash_table_insert_internal (hash_table, key, value, FALSE);
}
g_hash_table_insert_internal (GHashTable *hash_table,
gpointer key,
gpointer value,
gboolean keep_new_key)
{
guint key_hash;
guint node_index;
g_return_val_if_fail (hash_table != NULL, FALSE);
node_index = g_hash_table_lookup_node (hash_table, key, &key_hash);//查找key对应的index,具体逻辑详见上面的g_hash_table_lookup_node()函数
return g_hash_table_insert_node (hash_table, node_index, key_hash, key, value, keep_new_key, FALSE);
}
// keep_new_key=false,reusing_key=false
static gboolean
g_hash_table_insert_node (GHashTable *hash_table,
guint node_index,
guint key_hash,
gpointer new_key,
gpointer new_value,
gboolean keep_new_key,
gboolean reusing_key)
{
gboolean already_exists;
guint old_hash;
gpointer key_to_free = NULL;
gpointer key_to_keep = NULL;
gpointer value_to_free = NULL;
old_hash = hash_table->hashes[node_index];//根据node_index获取hash值
already_exists = HASH_IS_REAL (old_hash);//判断key是否存在hash表中
/* Proceed in three steps. First, deal with the key because it is the
* most complicated. Then consider if we need to split the table in
* two (because writing the value will result in the set invariant
* becoming broken). Then deal with the value.
*
* There are three cases for the key:
*
* - entry already exists in table, reusing key:
* free the just-passed-in new_key and use the existing value
*
* - entry already exists in table, not reusing key:
* free the entry in the table, use the new key
*
* - entry not already in table:
* use the new key, free nothing
*
* We update the hash at the same time...
*/
if (already_exists)
{
/* Note: we must record the old value before writing the new key
* because we might change the value in the event that the two
* arrays are shared.
*/
value_to_free = g_hash_table_fetch_key_or_value (hash_table->values, node_index, hash_table->have_big_values);//获取node_index所对应的value
if (keep_new_key)//是否使用新key
{
key_to_free = g_hash_table_fetch_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys);
key_to_keep = new_key;
}
else
{
key_to_free = new_key;//不适用新key
key_to_keep = g_hash_table_fetch_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys);
}
}
else //key不在hash表中
{
hash_table->hashes[node_index] = key_hash;
key_to_keep = new_key;
}
/* Resize key/value arrays and split table as necessary */
g_hash_table_ensure_keyval_fits (hash_table, key_to_keep, new_value);
g_hash_table_assign_key_or_value (hash_table->keys, node_index, hash_table->have_big_keys, key_to_keep);
/* Step 3: Actually do the write */
g_hash_table_assign_key_or_value (hash_table->values, node_index, hash_table->have_big_values, new_value);
/* Now, the bookkeeping... */
if (!already_exists)
{
hash_table->nnodes++;
if (HASH_IS_UNUSED (old_hash))
{
/* We replaced an empty node, and not a tombstone */
hash_table->noccupied++;
g_hash_table_maybe_resize (hash_table);
}
#ifndef G_DISABLE_ASSERT
hash_table->version++;
#endif
}
if (already_exists)
{
if (hash_table->key_destroy_func && !reusing_key)
(* hash_table->key_destroy_func) (key_to_free);//释放key
if (hash_table->value_destroy_func)
(* hash_table->value_destroy_func) (value_to_free);//释放value
}
return !already_exists;
}
以上就是对GHashTable的简要分析过程,主要是在阅读libvirt源码时看到有使用该数据结构,因此产生了兴趣,毕竟学习源码除了学习核心思想之外,另外就是学习一些关键的数据结构和算法了。另外,对于glib的hashtable阐述比较详细的文章参考http://blog.chinaunix.net/uid-24774106-id-3605760.html。