1. 堆的简单应用
- 1.1. 大根堆的操作
- 1.2. 机器调度

1. 堆的简单应用

1.1. 大根堆的操作

问题描述：

实现判断大根堆、大根堆的初始化、大根堆的插入、大根堆的删除、堆排序的操作。

求解策略：

大根堆判断：这里可以采用非递归和递归两种方式进行判断。
- 非递归：采用层次遍历，依次判断每个节点和其左右孩子（如果有的话）的大小关系，如果该节点的值小于其其中一个孩子节点的值，则不是大根堆；否则，继续判断下一个节点，直到所有节点判断完成为止。可用一个队列存储一些节点的编号，这些节点满足：该节点还未进行判断，但其父节点已经判断过满足大根堆的定义。这样每次从队列中取出一个节点进行判断，直到队列为空为止。（代码中函数 IsMaxHeap() 就是采用的非递归实现的）
- 递归：对每一个节点，每次只判断该节点和其左右孩子（如果有的话）的大小关系，对其孩子节点，只判断其孩子节点和其孩子的左右孩子的大小关系，依此类推，直到所有节点判断完毕为止。（代码中函数 IsMaxHeap2() 就是采用递归实现的）
大根堆的初始化：对每一个非叶子节点，判断其与左右孩子中较大者的关系，如果该节点的值大于其左右孩子中的较大者，则不进行任何操作，否则，交换该节点与孩子中较大者的值，即是该节点的下沉过程。采用此方法，从最后一个非叶子节点开始调整，直到调整到根节点为止。
大根堆的插入：将新元素插入到数组最后，然后判断新元素与其父节点的关系，如果大于其父节点的值，则交换其与父节点的值，继续此操作，直到其小于其父节点的值，则插入元素完成。该过程即是新元素上浮的过程。
堆排序：每次将根元素与最后一个未排序的元素交换，然后从根结点调整，使未排序的元素成为一个最大堆。依此类推，直到所有元素都排序完成，产生一个从小到大的序列，排序过后的序列就不是一个堆了。此方法时间复杂度为 $O(nlogn)$。

/* 堆的应用：堆排序、机器调度、霍夫曼编码 */
#include <iostream>
#include <vector>
#include <queue>
using namespace std;

template<typename T>
ostream& operator<<(ostream& out, vector<T>& vec){
    for (int i = 0; i < vec.size(); ++i){
        out << vec[i] << ' ';
    }
    return out;
}
/* 大根堆堆的操作：判断是不是大根堆、堆的调整、堆的插入、堆的删除、堆排序，堆用数组表示。 */
/* 判断是不是大根堆 */
template<typename T>
bool IsMaxHeap(vector<T>& heap){        // 根据定义，判断每一个节点与其孩子的关系。非递归，采用层次遍历。
    if (heap.size() <= 1)
        return true;
    queue<int> q;                 // 存储将要判断的节点的编号
    int size = heap.size();
    int root = 0;
    while (root < size){
        if (2*root+1 < size){
            if (heap[root] >= heap[2*root+1])
                q.push(2*root+1);                                   // 将 root 的左孩子存在队列中
            else
                return false;
        }
        if (2*root+2 < size){
            if (heap[root] >= heap[2*root+2])
                q.push(2*root+2);                                   // 将 root 的右孩子存在队列中
            else
                return false;
        }
        if (q.empty())
            break;
        root = q.front();
        q.pop();
    }
    return true;
}

template<typename T>
bool IsMaxHeap2(vector<T>& heap, int root){             // 递归实现，每次只判断节点和其左右孩子的关系
    if (heap.size() <= 1)
        return true;
    if (root >= heap.size()){
        cout << root << " should not be >= " << heap.size() << endl;
        return false;
    }
    int size = heap.size();
    if (2*root+1 < size){
        if (heap[root] < heap[2*root+1])
            return false;
        else
            IsMaxHeap2(heap, 2*root+1);
    }
    if (2*root+2 < size){
        if (heap[root] < heap[2*root+2])
            return false;
        else
            IsMaxHeap2(heap, 2*root+2);
    }
    return true;
}

/* 大根堆的调整 */
template<typename T>
void MaxHeapAdjust(vector<T>& heap, int root, int length){      // 调整以节点 root 为根的子树，length 为调整到的最大编号
    while (2*root + 1 < length){
        int child = 2*root + 1;
        if (2*root + 2 < length && heap[2*root+2] > heap[2*root+1])
            child = 2*root + 2;             // 取以 root 节点为根的左右孩子值较大的那个
        if (heap[child] > heap[root])
            swap(heap[child], heap[root]);  // 如果孩子较大的那个比根还大，则交换它们
        root = child;                       // 根元素的下沉过程
    }
}

/* 大根堆插入 */
template<typename T>
void MaxHeapPush(vector<T>& heap, const T& element){        // 注意这里如果插入元素时没有剩余的空间，vector 会自动扩容
    if (!IsMaxHeap2(heap, 0)){                                 // 判断是不是堆，不是则返回
        cout << "error! not a heap." << endl;
        return;
    }
    heap.push_back(element);
    int size = heap.size();
    int currentNode = size - 1;
    while (currentNode != 0 && heap[currentNode] > heap[(currentNode-1)/2]){    // 插入的节点元素上浮
        swap(heap[currentNode], heap[(currentNode-1)/2]);
        currentNode = (currentNode-1)/2;
    }
}

/* 大根堆删除 */
template<typename T>
void MaxHeapPop(vector<T>& heap){           // 删除根元素
    if (heap.size() == 0)                   // 没有元素，直接返回
        return;
    if (!IsMaxHeap2(heap, 0)){                 // 判断是不是堆，不是则返回
        cout << "error! not a heap." << endl;
        return;
    }
    swap(heap[0], heap[heap.size()-1]);     // 交换第一个元素和最后一个元素
    heap.pop_back();                        // 删除最后一个元素
    MaxHeapAdjust(heap, 0, heap.size());    // 重新调整，使其成为大根堆
}

/* 大根堆排序 */
template<typename T>
void MaxHeapSort(vector<T>& heap){          // 从小到大排序
    for (int i = heap.size()/2-1; i >= 0; --i)      // 使输入成为一个堆
        MaxHeapAdjust(heap, i, heap.size());
    for (int i = heap.size() - 1; i > 0; --i){
        swap(heap[0], heap[i]);                     // 将第0个元素与最后一个未排序的元素交换
        MaxHeapAdjust(heap, 0, i);                  // 重新调整以根结点的堆，调整的长度为未排序的长度
    }
}

/* main 函数 */
int main(int argc, char** argv){
    int arr[] = {6, 9, 3, 8, 2, 4, 7, 1, 5};
    vector<int> heap(arr, arr + sizeof(arr)/sizeof(int));
    cout << "initial matrix: " << heap << endl;
    cout << "is heap: [method 1: " << IsMaxHeap(heap) << "]\t [method 2: " << IsMaxHeap2(heap, 0) << "]" << endl;
    cout << "----------------------------------------" << endl;
    for (int i = heap.size()/2-1; i >= 0; --i)      // 创建堆
        MaxHeapAdjust(heap, i, heap.size());
    cout << "initial heap: " << heap << endl;
    cout << "is heap: [method 1: " << IsMaxHeap(heap) << "]\t [method 2: " << IsMaxHeap2(heap, 0) << "]" << endl;
    cout << "----------------------------------------" << endl;
    MaxHeapPush(heap, 0);       // 插入元素
    cout << "heap push: " << heap << endl;
    cout << "is heap: [method 1: " << IsMaxHeap(heap) << "]\t [method 2: " << IsMaxHeap2(heap, 0) << "]" << endl;
    cout << "----------------------------------------" << endl;
    MaxHeapPop(heap);           // 删除元素
    cout << "heap pop: " << heap << endl;
    cout << "is heap: [method 1: " << IsMaxHeap(heap) << "]\t [method 2: " << IsMaxHeap2(heap, 0) << "]" << endl;
    cout << "----------------------------------------" << endl;
    MaxHeapSort(heap);          // 堆排序，堆排序后就不是一个堆了
    cout << "heap sort: " << heap << endl;
    cout << "is heap: [method 1: " << IsMaxHeap(heap) << "]\t [method 2: " << IsMaxHeap2(heap, 0) << "]" << endl;
    cout << "----------------------------------------" << endl;
    return 1;
}

程序执行结果为：

initial matrix: 6 9 3 8 2 4 7 1 5
is heap: [method 1: 0]   [method 2: 0]
----------------------------------------
initial heap: 9 8 7 6 2 4 3 1 5
is heap: [method 1: 1]   [method 2: 1]
----------------------------------------
heap push: 9 8 7 6 2 4 3 1 5 0
is heap: [method 1: 1]   [method 2: 1]
----------------------------------------
heap pop: 8 6 7 5 2 4 3 1 0
is heap: [method 1: 1]   [method 2: 1]
----------------------------------------
heap sort: 0 1 2 3 4 5 6 7 8
is heap: [method 1: 0]   [method 2: 0]

1.2. 机器调度

问题描述：

一个工厂具有 $m$ 台一模一样的机器。我们有 $n$ 个作业要处理。设作业 $i$ 的处理时间为 $t_i$，这个时间包括把作业放入机器和从机器上取下的时间。所谓 调度（schedule） 是指按作业在机器上的运行时间分配作业，使得；

一台机器在同一时间内只能处理一个作业；
一个作业不能同时在两台机器上处理；
一个作业 $i$ 的处理时间是 $t_i$ 个时间单位。

假如每台机器在 0 时刻都是可用的，完成时间（finish time） 或 调度长度（length of a schedule） 是指完成所有作业的时间。在一个非抢先调度中，一项作业 $i$ 在一台机器上处理，从时刻 $s_i$ 开始，到时刻 $s_i+t_i$ 结束。

例如：有 3 台机器要处理 7 个作业，这 7 个作业的处理时间分别为 $(2, 14, 4, 16, 6, 5, 3)$。3 台机器的编号为 $M1,M2,M3$。则最优的调度安排如下（作业编号从 0 开始）：（其完成全部作业所需时间为 17）

机器编号	任务编号	处理时间
$M1$	3	16 = 16
$M2$	1,6	14+3 = 17
$M3$	4,5,2,0	6+5+4+2 = 17

任务就是写一个程序，实现在 $m$ 台机器上执行 $n$ 个作业的最小完成时间的调度。

求解策略：

调度问题是 NP-复杂问题（NP 表示 nondeterministic polynornial） 中的一个。NP-复杂问题及 NP-完全问题是指尚未找到具有多项式时间复杂性算法的问题。NP-完全问题是一类判定问题，也就是说，对这类问题的每一个实例，答案为是或否。NP-复杂问题的优化问题经常用 近似算法（approximation aigorithm） 解决，虽然近似算法不能保证得到最优解，但是能保证得到近似最优解。

在本调度问题中，采用一个简单调度策略，称为 最长处理时间（longest processing time, LPT），它的调度长度是最优调度长度的 $4/3-1/(3m)$。在 $LPT$ 算法中，作业按处理时间的递减顺序排列。当一个作业需要分配时，总是分配给最先变为空闲的机器。

可用堆来建立 $LPT$ 调度方案，时间性能为 $O(nlogn)$。当 $n{\leq}m$ 时，只需要将作业 $i$ 在 $0$ ~ $t_i$ 时间分配给机器 $i$ 来处理。当 $n>m$ 时，可以首先利用堆排序将作业按处理时间递减顺序排列。为了建立 $LPT$ 调度方案，作业按处理时间从大到小顺序进行分配。为了决定将一个作业分配给哪一台机器，必须知道哪台机器最先空闲。为此，维持一个 $m$ 台机器的小根堆。

#include <iostream>
#include <vector>
#include <algorithm>        // 主要使用 make_heap(),push_heap(),pop_heap(),sort_heap() 函数
using namespace std;
/* 机器调度 */
/* 类 jobNode 表示作业 */
class jobNode{
    friend int makeSchedule(vector<jobNode>&, int, int);
    friend int main(int, char**);
    public:
        operator int () const {return time;}
    private:
        int id,             // job identifier
            time;           // processing time
};
/* 类 machineNode 表示机器 */
class machineNode{
    friend int makeSchedule(vector<jobNode>&, int, int);
    public:
        machineNode(int theID = 0, int theAvail = 0){
            id = theID;
            avail = theAvail;
        }
        operator int () const {return avail;}
    private:
        int id,             // machine identifier
            avail;          // when it becones free
};

int makeSchedule(vector<jobNode>& vec, int n, int m){           // n 表示任务数量，m 表示机器数量
    make_heap(vec.begin(), vec.end(), greater<jobNode>());
    sort_heap(vec.begin(), vec.end(), greater<jobNode>());              // 从大到小排序
    if (n <= m){
        cout << "Schedule each job on a different machine." << endl;
        return vec.front().time;
    }

    // 初始化 m 台机器，建立小根堆
    vector<machineNode> machineHeap(m);
    for (int i = 0; i < m; ++i)
        machineHeap[i] = machineNode(i, 0);
    make_heap(machineHeap.begin(), machineHeap.end(), greater<machineNode>());      // 建立小根堆

    // 生成调度计划
    for (int i = 0; i < n; ++i){         // 把作业 i 安排在第一台空闲的机器
        pop_heap(machineHeap.begin(), machineHeap.end(), greater<machineNode>());
        machineNode x = machineHeap.back();
        machineHeap.pop_back();
        cout << "Schedule job " << vec[i].id << " on machine " << x.id << " from " << x.avail << " to " << (x.avail + vec[i].time) << endl;
        x.avail += vec[i].time;
        machineHeap.push_back(x);
        push_heap(machineHeap.begin(), machineHeap.end(), greater<machineNode>());
    }
    make_heap(machineHeap.begin(), machineHeap.end(), less<machineNode>());     // 重建大根堆
    return machineHeap.front().avail;
}

/* main 函数 */
int main(int argc, char** argv){
    /* 机器调度 */
    int arrJob[] = {2, 14, 4, 16, 6, 5, 3};
    vector<jobNode> vecJob(7);
    for (int i = 0; i < 7; ++i){
        vecJob[i].id = i;
        vecJob[i].time = arrJob[i];
    }
    cout << "min finish time = " << makeSchedule(vecJob, 7, 3) << endl;
    return 1;
}

程序执行结果为：

Schedule job 3 on machine 2 from 0 to 16
Schedule job 1 on machine 1 from 0 to 14
Schedule job 4 on machine 0 from 0 to 6
Schedule job 5 on machine 0 from 6 to 11
Schedule job 2 on machine 0 from 11 to 15
Schedule job 6 on machine 1 from 14 to 17
Schedule job 0 on machine 0 from 15 to 17
min finish time = 17

时间复杂度：该方法时间复杂度为 $O(nlogn)$。