请注意:Note that if the data structure is not too slow, you are done! No need to do something fancy if something simple will work.
基准测试
完美扩展:perfect scaling 虽然工作量增多,但是并行执行后,完成任务的时间没有增加
1.3. 可拓展技术
懒惰计数器:sloppy counter
其实应该叫做...近似计数算法 approximate counter...中文版太辣了!
通过多个局部计数器和一个全局计数器来实现...每个局部计数器上有一个锁,全局计数器有一个。
基本思想:
The basic idea of approximate counting is as follows. When a thread running on a given core wishes to increment the counter, it increments its local counter; access to this local counter is synchronized via the corresponding local lock. Because each CPU has its own local counter, threads across CPUs can update local counters without contention, and thus updates to the counter are scalable.
这时候可以增加一个过手锁...hand-over-hand locking or 锁耦合 loack coupling
The idea is pretty simple. Instead of having a single lock for the entire list, you instead add a lock per node of the list. When traversing the list, the code first grabs the next node’s lock and then releases the current node’s lock (which inspires the name hand-over-hand).
更多并发不一定更快
并发机制过于复杂就会引入新的复杂度...
所以一般采用两种方案:
简单但少一点并发
复杂但多一点并发
通过性能 Test 来决定采用哪种方式...毕竟这无法作弊..
当心锁和控制流
注意控制流的变化导致函数返回和退出,或其他情况导致函数停止执行...
Because many functions will begin by acquiring a lock, allocating some memory, or doing other similar stateful operations, when errors arise, the code has to undo all of the state before returning, which is errorprone. Thus, it is best to structure code to minimize this pattern.
要在发生错误时 恢复各种状态,回到最初的样子~
3. 并发队列
为数据结构增加并发的方式:添加一把大锁
添加扩展性:每个小结点添加一把小锁
One trick used by Michael and Scott is to add a dummy node (allocated in the queue initialization code); this dummy enables the separation of head and tail operations. Study the code, or better yet, type it in, run it, and measure it, to understand how it works deeply.
队头锁 head + 队尾锁 tail...这样可以并发处理入队和出队问题
需要正确判断临界区(玩锁的一个秘诀)
4. 并发散列表
或者俗称的 Hash 表
这里说的散列表是固定大小的...
而且这里使用的底层数据结构是之前实现的 并发链表...
所以每个散列的键值都支持键值(可拓展性 max)
避免不成熟的优化
人话:先做对且垃圾的事情,再考虑优化
5. 作业(编码作业)
In this homework, you’ll gain some experience with writing concurrent code and measuring its performance. Learning to build code that performs well is a critical skill and thus gaining a little experience here with it is quite worthwhile.
typedef struct counter_t
{
int global;
pthread_mutex_t glock;
int local[NUMCPUS];
pthread_mutex_t llock[NUMCPUS];
int threshold;
} counter_t;
void init(counter_t *c, int threshold)
{
c->threshold = threshold;
c->global = 0;
pthread_mutex_init(&c->glock, NULL);
int i;
for (int i = 0; i < NUMCPUS; i++)
{
c->local[i] = 0;
pthread_mutex_init(&c->llock[i], NULL);
}
}
void update(counter_t *c, int threadID, int amt)
{
pthread_mutex_lock(&c->llock[threadID]);
c->local[threadID] += amt;
if (c->local[threadID] >= c->threshold)
{
pthread_mutex_lock(&c->glock);
c->global += c->local[threadID];
pthread_mutex_unlock(&c->glock);
c->local[threadID] = 0;
}
pthread_mutex_unlock(&c->llock[threadID]);
}
int get(counter_t *c)
{
pthread_mutex_lock(&c->glock);
int rc = c->global;
pthread_mutex_unlock(&c->glock);
return rc;
}
// basic node structure
typedef struct __node_t
{
int key;
struct __node_t *next;
} node_t;
// basic list structure (one used per list)
typedef struct __list_t
{
node_t *head;
pthread_mutex_t lock;
} list_t;
void List_Init(list_t *L)
{
L->head = NULL;
pthread_mutex_init(&L->lock, NULL);
}
// insert a new node at the head of the list
int List_Insert(list_t *L, int key)
{
pthread_mutex_lock(&L->lock);
node_t *new = malloc(sizeof(node_t));
if (new == NULL)
{
perror("malloc");
pthread_mutex_unlock(&L->lock);
return -1; // fail
}
new->key = key;
new->next = L->head;
L->head = new;
pthread_mutex_unlock(&L->lock);
return 0; // success
}
// remove the first node with the given key from the list
int List_Remove(list_t *L, int key)
{
pthread_mutex_lock(&L->lock);
node_t *prev = NULL;
node_t *curr = L->head;
while (curr)
{
if (curr->key == key)
{
if (prev)
{
prev->next = curr->next;
}
else
{
L->head = curr->next;
}
free(curr);
pthread_mutex_unlock(&L->lock);
return 0; // success
}
prev = curr;
curr = curr->next;
}
pthread_mutex_unlock(&L->lock);
return -1; // failure
}
// check if a node with the given key is in the list
int List_Lookup(list_t *L, int key)
{
pthread_mutex_lock(&L->lock);
node_t *curr = L->head;
while (curr)
{
if (curr->key == key)
{
pthread_mutex_unlock(&L->lock);
return 0; // success
}
curr = curr->next;
}
pthread_mutex_unlock(&L->lock);
return -1; // failure
}
void List_Init(list_t *L)
{
L->head = NULL;
pthread_mutex_init(&L->lock, NULL);
}
int List_Insert(list_t *L, int key)
{
// malloc: synchronization not needed
node_t *new = malloc(sizeof(node_t));
if (new == NULL)
{
perror("malloc");
return -1;
}
new->key = key;
// just lock critical section
pthread_mutex_lock(&L->lock);
new->next = L->head;
L->head = new;
pthread_mutex_unlock(&L->lock);
return 0; // success
}
int List_Lookup(list_t *L, int key)
{
int rv = -1;
pthread_mutex_lock(&L->lock);
node_t *curr = L->head;
while (curr)
{
if (curr->key == key)
{
rv = 0;
break;
}
curr = curr->next;
}
pthread_mutex_unlock(&L->lock);
return rv; // now both success and failure
}