CHEN Yihui hace 5 años
padre
commit
f37dac2d4f
Se han modificado 12 ficheros con 756 adiciones y 1 borrados
  1. 145 0
      e/closure.cc
  2. 13 0
      e/data.py
  3. 17 0
      e/data.txt
  4. 19 0
      img/1.gv
  5. 89 0
      img/1.svg
  6. 26 0
      img/2.gv
  7. 150 0
      img/2.svg
  8. 13 0
      img/3.gv
  9. 56 0
      img/3.svg
  10. 25 0
      img/4.gv
  11. 188 0
      img/4.svg
  12. 15 1
      report.md

+ 145 - 0
e/closure.cc

@@ -0,0 +1,145 @@
+#include <mpi.h>
+#include <omp.h>
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <vector>
+
+#define OMP_THREADS 2
+
+inline int sqrti(int n) {
+  float x = std::sqrt(n);
+  int r = static_cast<int>(x);
+  if (std::abs(r - x) < 1e-9) {
+    return r;
+  } else {
+    assert(0);
+  }
+}
+
+class MatWrap {
+ private:
+  int* data_;
+  int n_;
+
+ public:
+  MatWrap() {}
+  MatWrap(int* data, int n) : data_(data), n_(n) {}
+  int* operator[](size_t n) const { return data_ + n * n_; }
+  int* split_map(int sqrt_q, int i, int j) const {
+    int sub_n = n_ / sqrt_q;
+    return data_ + (i / sub_n * sqrt_q + j / sub_n) * sub_n * sub_n +
+           i % sub_n * sub_n + j % sub_n;
+  }
+  void print(bool mapping = false, int sqrt_q = -1) const {
+    for (int i = 0; i < n_; ++i) {
+      for (int j = 0; j < n_; ++j) {
+        if (mapping) {
+          printf("%5d", *(split_map(sqrt_q, i, j)));
+        } else {
+          printf("%5d", operator[](i)[j]);
+        }
+      }
+      printf("\n");
+    }
+  }
+  friend void MatMultAdd(const MatWrap& a, const MatWrap& b, MatWrap& c);
+};
+
+void MatMultAdd(const MatWrap& a, const MatWrap& b, MatWrap& c) {
+  assert(a.n_ == b.n_);
+  assert(a.n_ == c.n_);
+  int n = a.n_;
+#pragma omp parallel for num_threads(OMP_THREADS)
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      for (int k = 0; k < n; ++k) {
+        c[i][j] = static_cast<bool>(c[i][j] + a[i][k] * b[k][j]);
+      }
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  int rank, size;
+  int n, sqrt_q, sub_n;
+  int* mat_a;
+  int* sub_mat_comm;
+  int *sub_mat_a, *sub_mat_b, *sub_mat_c;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+  sqrt_q = sqrti(size);
+  if (rank == 0) {
+    // load matrix data
+    FILE* fp = fopen("data.txt", "rt");
+    fscanf(fp, "%d", &n);
+    mat_a = new int[n * n];
+    MatWrap ma(mat_a, n);
+    for (int i = 0; i < n; ++i) {
+      for (int j = 0; j < n; ++j) {
+        fscanf(fp, "%d", ma.split_map(sqrt_q, i, j));
+      }
+      *ma.split_map(sqrt_q, i, i) = 1;
+    }
+  }
+  // broadcast matrix size
+  MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  assert(n % sqrt_q == 0);
+  sub_n = n / sqrt_q;
+  // alloc space for submatrix
+  sub_mat_a = new int[sub_n * sub_n];
+  sub_mat_b = new int[sub_n * sub_n];
+  sub_mat_c = new int[sub_n * sub_n];
+  sub_mat_comm = new int[sub_n * sub_n];
+  MatWrap sub_comm(sub_mat_comm, sub_n);
+  MatWrap sub_b(sub_mat_b, sub_n);
+  MatWrap sub_c(sub_mat_c, sub_n);
+  // init sub_c
+  for (int k = 0; k <= std::log2f(n); ++k) {
+    for (int i = 0; i < sub_n; ++i) {
+      for (int j = 0; j < sub_n; ++j) {
+        sub_c[i][j] = 0;
+      }
+    }
+    // broadcast sub matrix
+    MPI_Scatter(mat_a, sub_n * sub_n, MPI_INT, sub_mat_a, sub_n * sub_n,
+                MPI_INT, 0, MPI_COMM_WORLD);
+    MPI_Scatter(mat_a, sub_n * sub_n, MPI_INT, sub_mat_b, sub_n * sub_n,
+                MPI_INT, 0, MPI_COMM_WORLD);
+    // split comm in col and row
+    MPI_Comm col_world, row_world;
+    int col_rank = rank % sqrt_q;
+    int row_rank = rank / sqrt_q;
+    MPI_Comm_split(MPI_COMM_WORLD, col_rank, row_rank, &col_world);
+    MPI_Comm_split(MPI_COMM_WORLD, row_rank, col_rank, &row_world);
+    // compute
+    for (int i = 0; i < sqrt_q; ++i) {
+      // broadcast sub_a
+      int send_root = (row_rank + i) % sqrt_q;
+      if (col_rank == (row_rank + i) % sqrt_q) {
+        memcpy(sub_mat_comm, sub_mat_a, sub_n * sub_n * sizeof(int));
+      }
+      MPI_Bcast(sub_mat_comm, sub_n * sub_n, MPI_INT, send_root, row_world);
+      // calculate sub mat gemm
+      MatMultAdd(sub_comm, sub_b, sub_c);
+      // swap sub_b
+      MPI_Sendrecv_replace(
+          sub_mat_b, sub_n * sub_n, MPI_INT, (row_rank + sqrt_q - 1) % sqrt_q,
+          1, (row_rank + 1) % sqrt_q, 1, col_world, MPI_STATUS_IGNORE);
+    }
+    // gather result
+    MPI_Gather(sub_mat_c, sub_n * sub_n, MPI_INT, mat_a, sub_n * sub_n, MPI_INT,
+               0, MPI_COMM_WORLD);
+    // print result
+    if (rank == 0) {
+      MatWrap mc(mat_a, n);
+      printf("loop:%d\n", k);
+      mc.print(true, sqrt_q);
+    }
+  }
+  MPI_Finalize();
+  return 0;
+}

+ 13 - 0
e/data.py

@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+import numpy as np
+import sys
+n = int(sys.argv[1])
+
+f = open('data.txt', 'wt')
+f.write('%d\n'%(n))
+
+a = np.random.random(size=(n, n)) * 0.6
+a = np.rint(a)
+
+np.savetxt(f, a, fmt='%d')
+f.close()

+ 17 - 0
e/data.txt

@@ -0,0 +1,17 @@
+16
+0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
+0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0
+0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0
+0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
+0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0
+1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0
+0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0
+0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0

+ 19 - 0
img/1.gv

@@ -0,0 +1,19 @@
+graph
+   {
+   subgraph cluster01
+   {
+   n00[label="LSTM"];
+   n01[label="it"];
+   n02[label="ft"];
+   n03[label="gt"];
+   n04[label="ot"];
+   n05[label="ct"];
+   n06[label="ht"];
+   n00--n01;
+   n00--n02;
+   n00--n03;
+   n00--n04;
+   n00--n05;
+   n00--n06;
+   }
+   }

+ 89 - 0
img/1.svg

@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.40.1 (20161225.0304)
+ -->
+<!-- Title: %3 Pages: 1 -->
+<svg width="454pt" height="148pt"
+ viewBox="0.00 0.00 454.00 148.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 144)">
+<title>%3</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-144 450,-144 450,4 -4,4"/>
+<g id="clust1" class="cluster">
+<title>cluster01</title>
+<polygon fill="none" stroke="#000000" points="8,-8 8,-132 438,-132 438,-8 8,-8"/>
+</g>
+<!-- n00 -->
+<g id="node1" class="node">
+<title>n00</title>
+<ellipse fill="none" stroke="#000000" cx="223" cy="-106" rx="38.9931" ry="18"/>
+<text text-anchor="middle" x="223" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">LSTM</text>
+</g>
+<!-- n01 -->
+<g id="node2" class="node">
+<title>n01</title>
+<ellipse fill="none" stroke="#000000" cx="43" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="43" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">it</text>
+</g>
+<!-- n00&#45;&#45;n01 -->
+<g id="edge1" class="edge">
+<title>n00&#45;&#45;n01</title>
+<path fill="none" stroke="#000000" d="M191.4421,-95.2404C162.0115,-84.9541 117.0575,-68.5957 79,-52 74.2601,-49.9331 69.2525,-47.5571 64.5278,-45.2234"/>
+</g>
+<!-- n02 -->
+<g id="node3" class="node">
+<title>n02</title>
+<ellipse fill="none" stroke="#000000" cx="115" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="115" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ft</text>
+</g>
+<!-- n00&#45;&#45;n02 -->
+<g id="edge2" class="edge">
+<title>n00&#45;&#45;n02</title>
+<path fill="none" stroke="#000000" d="M200.6223,-91.0816C180.9747,-77.9832 152.7037,-59.1358 134.1003,-46.7336"/>
+</g>
+<!-- n03 -->
+<g id="node4" class="node">
+<title>n03</title>
+<ellipse fill="none" stroke="#000000" cx="187" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="187" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">gt</text>
+</g>
+<!-- n00&#45;&#45;n03 -->
+<g id="edge3" class="edge">
+<title>n00&#45;&#45;n03</title>
+<path fill="none" stroke="#000000" d="M214.1011,-88.2022C208.4377,-76.8753 201.143,-62.2859 195.5486,-51.0972"/>
+</g>
+<!-- n04 -->
+<g id="node5" class="node">
+<title>n04</title>
+<ellipse fill="none" stroke="#000000" cx="259" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="259" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ot</text>
+</g>
+<!-- n00&#45;&#45;n04 -->
+<g id="edge4" class="edge">
+<title>n00&#45;&#45;n04</title>
+<path fill="none" stroke="#000000" d="M231.8989,-88.2022C237.5623,-76.8753 244.857,-62.2859 250.4514,-51.0972"/>
+</g>
+<!-- n05 -->
+<g id="node6" class="node">
+<title>n05</title>
+<ellipse fill="none" stroke="#000000" cx="331" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="331" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ct</text>
+</g>
+<!-- n00&#45;&#45;n05 -->
+<g id="edge5" class="edge">
+<title>n00&#45;&#45;n05</title>
+<path fill="none" stroke="#000000" d="M245.3777,-91.0816C265.0253,-77.9832 293.2963,-59.1358 311.8997,-46.7336"/>
+</g>
+<!-- n06 -->
+<g id="node7" class="node">
+<title>n06</title>
+<ellipse fill="none" stroke="#000000" cx="403" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="403" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ht</text>
+</g>
+<!-- n00&#45;&#45;n06 -->
+<g id="edge6" class="edge">
+<title>n00&#45;&#45;n06</title>
+<path fill="none" stroke="#000000" d="M254.5579,-95.2404C283.9885,-84.9541 328.9425,-68.5957 367,-52 371.7399,-49.9331 376.7475,-47.5571 381.4722,-45.2234"/>
+</g>
+</g>
+</svg>

+ 26 - 0
img/2.gv

@@ -0,0 +1,26 @@
+digraph
+   {
+   subgraph cluster01
+   {
+   n00[label="LSTM"];
+   n01[label="it"];
+   n02[label="ft"];
+   n03[label="gt"];
+   n04[label="ot"];
+   n05[label="ct"];
+   n06[label="ht"];
+   }
+   n00 -> n01[label="wi,xt"];
+   n00 -> n02[label="wf,xt"];
+   n00 -> n03[label="wg,xt"];
+   n00 -> n04[label="wo,xt"];
+   n01 -> n05[label="it"];
+   n02 -> n05[label="ft"];
+   n03 -> n05[label="gt"];
+   n04 -> n06[label="ot"];
+   n05 -> n06[label="ct"];
+   n06 -> n01[label="ht"];
+   n06 -> n02[label="ht"];
+   n06 -> n03[label="ht"];
+   n06 -> n04[label="ht"];
+   }

+ 150 - 0
img/2.svg

@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.40.1 (20161225.0304)
+ -->
+<!-- Title: %3 Pages: 1 -->
+<svg width="370pt" height="576pt"
+ viewBox="0.00 0.00 370.00 576.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 572)">
+<title>%3</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-572 366,-572 366,4 -4,4"/>
+<g id="clust1" class="cluster">
+<title>cluster01</title>
+<polygon fill="none" stroke="#000000" points="8,-8 8,-560 354,-560 354,-8 8,-8"/>
+</g>
+<!-- n00 -->
+<g id="node1" class="node">
+<title>n00</title>
+<ellipse fill="none" stroke="#000000" cx="152" cy="-534" rx="38.9931" ry="18"/>
+<text text-anchor="middle" x="152" y="-530.3" font-family="Times,serif" font-size="14.00" fill="#000000">LSTM</text>
+</g>
+<!-- n01 -->
+<g id="node2" class="node">
+<title>n01</title>
+<ellipse fill="none" stroke="#000000" cx="195" cy="-409" rx="27" ry="18"/>
+<text text-anchor="middle" x="195" y="-405.3" font-family="Times,serif" font-size="14.00" fill="#000000">it</text>
+</g>
+<!-- n00&#45;&gt;n01 -->
+<g id="edge1" class="edge">
+<title>n00&#45;&gt;n01</title>
+<path fill="none" stroke="#000000" d="M158.1609,-516.0905C165.3134,-495.2984 177.1918,-460.7681 185.5219,-436.5527"/>
+<polygon fill="#000000" stroke="#000000" points="188.9072,-437.4709 188.8506,-426.8762 182.2879,-435.1938 188.9072,-437.4709"/>
+<text text-anchor="middle" x="193" y="-467.8" font-family="Times,serif" font-size="14.00" fill="#000000">wi,xt</text>
+</g>
+<!-- n02 -->
+<g id="node3" class="node">
+<title>n02</title>
+<ellipse fill="none" stroke="#000000" cx="301" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="301" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ft</text>
+</g>
+<!-- n00&#45;&gt;n02 -->
+<g id="edge2" class="edge">
+<title>n00&#45;&gt;n02</title>
+<path fill="none" stroke="#000000" d="M188.2484,-527.1929C234.8954,-517.3778 310,-497.5326 310,-471.5 310,-471.5 310,-471.5 310,-284 310,-203.585 305.292,-108.7143 302.6862,-62.3648"/>
+<polygon fill="#000000" stroke="#000000" points="306.1694,-61.9691 302.1036,-52.1855 299.1808,-62.3692 306.1694,-61.9691"/>
+<text text-anchor="middle" x="328" y="-280.3" font-family="Times,serif" font-size="14.00" fill="#000000">wf,xt</text>
+</g>
+<!-- n03 -->
+<g id="node4" class="node">
+<title>n03</title>
+<ellipse fill="none" stroke="#000000" cx="54" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="54" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">gt</text>
+</g>
+<!-- n00&#45;&gt;n03 -->
+<g id="edge3" class="edge">
+<title>n00&#45;&gt;n03</title>
+<path fill="none" stroke="#000000" d="M114.0145,-529.7565C79.5194,-523.5427 34,-508.3598 34,-471.5 34,-471.5 34,-471.5 34,-96.5 34,-84.3757 37.4563,-71.5308 41.4953,-60.6597"/>
+<polygon fill="#000000" stroke="#000000" points="44.7728,-61.8907 45.2859,-51.3083 38.2855,-59.2611 44.7728,-61.8907"/>
+<text text-anchor="middle" x="54" y="-280.3" font-family="Times,serif" font-size="14.00" fill="#000000">wg,xt</text>
+</g>
+<!-- n04 -->
+<g id="node5" class="node">
+<title>n04</title>
+<ellipse fill="none" stroke="#000000" cx="163" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="163" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ot</text>
+</g>
+<!-- n00&#45;&gt;n04 -->
+<g id="edge4" class="edge">
+<title>n00&#45;&gt;n04</title>
+<path fill="none" stroke="#000000" d="M133.4559,-517.9037C122.2047,-506.2932 110,-489.5409 110,-471.5 110,-471.5 110,-471.5 110,-320.5 110,-217.5766 68.0107,-180.0509 116,-89 122.2223,-77.1943 131.3757,-65.7141 139.9554,-56.3228"/>
+<polygon fill="#000000" stroke="#000000" points="142.5974,-58.6235 146.9504,-48.9641 137.5238,-53.8006 142.5974,-58.6235"/>
+<text text-anchor="middle" x="129" y="-280.3" font-family="Times,serif" font-size="14.00" fill="#000000">wo,xt</text>
+</g>
+<!-- n05 -->
+<g id="node6" class="node">
+<title>n05</title>
+<ellipse fill="none" stroke="#000000" cx="195" cy="-284" rx="27" ry="18"/>
+<text text-anchor="middle" x="195" y="-280.3" font-family="Times,serif" font-size="14.00" fill="#000000">ct</text>
+</g>
+<!-- n01&#45;&gt;n05 -->
+<g id="edge5" class="edge">
+<title>n01&#45;&gt;n05</title>
+<path fill="none" stroke="#000000" d="M195,-390.8239C195,-370.2723 195,-336.5472 195,-312.4893"/>
+<polygon fill="#000000" stroke="#000000" points="198.5001,-312.198 195,-302.198 191.5001,-312.198 198.5001,-312.198"/>
+<text text-anchor="middle" x="200.5" y="-342.8" font-family="Times,serif" font-size="14.00" fill="#000000">it</text>
+</g>
+<!-- n02&#45;&gt;n05 -->
+<g id="edge6" class="edge">
+<title>n02&#45;&gt;n05</title>
+<path fill="none" stroke="#000000" d="M296.0777,-51.7549C291.9635,-66.0298 285.7048,-86.5153 279,-104 257.1382,-161.0111 248.1228,-173.8113 222,-229 217.5885,-238.32 212.6465,-248.4679 208.2084,-257.4804"/>
+<polygon fill="#000000" stroke="#000000" points="204.9341,-256.206 203.6392,-266.7214 211.209,-259.3086 204.9341,-256.206"/>
+<text text-anchor="middle" x="269" y="-155.3" font-family="Times,serif" font-size="14.00" fill="#000000">ft</text>
+</g>
+<!-- n03&#45;&gt;n05 -->
+<g id="edge7" class="edge">
+<title>n03&#45;&gt;n05</title>
+<path fill="none" stroke="#000000" d="M60.4959,-51.5533C62.9634,-57.7835 65.9249,-64.7881 69,-71 103.764,-141.225 152.9449,-219.4967 178.357,-258.708"/>
+<polygon fill="#000000" stroke="#000000" points="175.6014,-260.8905 183.9907,-267.3612 181.4677,-257.0711 175.6014,-260.8905"/>
+<text text-anchor="middle" x="135" y="-155.3" font-family="Times,serif" font-size="14.00" fill="#000000">gt</text>
+</g>
+<!-- n06 -->
+<g id="node7" class="node">
+<title>n06</title>
+<ellipse fill="none" stroke="#000000" cx="209" cy="-159" rx="27" ry="18"/>
+<text text-anchor="middle" x="209" y="-155.3" font-family="Times,serif" font-size="14.00" fill="#000000">ht</text>
+</g>
+<!-- n04&#45;&gt;n06 -->
+<g id="edge8" class="edge">
+<title>n04&#45;&gt;n06</title>
+<path fill="none" stroke="#000000" d="M169.4601,-51.5546C177.1505,-72.4524 190.0782,-107.582 199.0485,-131.958"/>
+<polygon fill="#000000" stroke="#000000" points="195.7686,-133.1797 202.5069,-141.3557 202.3379,-130.7622 195.7686,-133.1797"/>
+<text text-anchor="middle" x="195.5" y="-92.8" font-family="Times,serif" font-size="14.00" fill="#000000">ot</text>
+</g>
+<!-- n05&#45;&gt;n06 -->
+<g id="edge9" class="edge">
+<title>n05&#45;&gt;n06</title>
+<path fill="none" stroke="#000000" d="M197.0357,-265.8239C199.3479,-245.1798 203.1488,-211.2431 205.8455,-187.1648"/>
+<polygon fill="#000000" stroke="#000000" points="209.3269,-187.5255 206.9618,-177.198 202.3704,-186.7463 209.3269,-187.5255"/>
+<text text-anchor="middle" x="210" y="-217.8" font-family="Times,serif" font-size="14.00" fill="#000000">gt</text>
+</g>
+<!-- n06&#45;&gt;n01 -->
+<g id="edge10" class="edge">
+<title>n06&#45;&gt;n01</title>
+<path fill="none" stroke="#000000" d="M227.8045,-172.0899C241.6228,-183.3931 258,-201.0939 258,-221.5 258,-346.5 258,-346.5 258,-346.5 258,-366.5191 241.7696,-382.3177 225.7848,-393.0227"/>
+<polygon fill="#000000" stroke="#000000" points="223.6943,-390.2005 217.0343,-398.4404 227.3791,-396.1521 223.6943,-390.2005"/>
+<text text-anchor="middle" x="266" y="-280.3" font-family="Times,serif" font-size="14.00" fill="#000000">ht</text>
+</g>
+<!-- n06&#45;&gt;n02 -->
+<g id="edge11" class="edge">
+<title>n06&#45;&gt;n02</title>
+<path fill="none" stroke="#000000" d="M221.0274,-142.6584C236.7995,-121.2289 264.6108,-83.4419 282.9236,-58.5603"/>
+<polygon fill="#000000" stroke="#000000" points="285.8609,-60.4741 288.9697,-50.3456 280.2232,-56.3247 285.8609,-60.4741"/>
+<text text-anchor="middle" x="267" y="-92.8" font-family="Times,serif" font-size="14.00" fill="#000000">ht</text>
+</g>
+<!-- n06&#45;&gt;n03 -->
+<g id="edge12" class="edge">
+<title>n06&#45;&gt;n03</title>
+<path fill="none" stroke="#000000" d="M191.3635,-145.3231C177.1555,-134.2617 156.7023,-118.2406 139,-104 118.6468,-87.6269 95.8049,-68.7897 78.9295,-54.7835"/>
+<polygon fill="#000000" stroke="#000000" points="81.0894,-52.0276 71.1616,-48.3277 76.6153,-57.4111 81.0894,-52.0276"/>
+<text text-anchor="middle" x="147" y="-92.8" font-family="Times,serif" font-size="14.00" fill="#000000">ht</text>
+</g>
+<!-- n06&#45;&gt;n04 -->
+<g id="edge13" class="edge">
+<title>n06&#45;&gt;n04</title>
+<path fill="none" stroke="#000000" d="M191.9618,-144.5643C181.3987,-134.4409 168.7774,-119.913 163,-104 158.2371,-90.8812 157.7475,-75.4453 158.6557,-62.4114"/>
+<polygon fill="#000000" stroke="#000000" points="162.1712,-62.4303 159.6843,-52.1316 155.206,-61.7334 162.1712,-62.4303"/>
+<text text-anchor="middle" x="171" y="-92.8" font-family="Times,serif" font-size="14.00" fill="#000000">ht</text>
+</g>
+</g>
+</svg>

+ 13 - 0
img/3.gv

@@ -0,0 +1,13 @@
+graph
+   {
+   subgraph cluster01
+   {
+   n01[label="it,ct,ht"];
+   n02[label="ft"];
+   n03[label="gt"];
+   n04[label="ot"];
+   n01--n02;
+   n01--n03;
+   n01--n04;
+   }
+   }

+ 56 - 0
img/3.svg

@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.40.1 (20161225.0304)
+ -->
+<!-- Title: %3 Pages: 1 -->
+<svg width="238pt" height="148pt"
+ viewBox="0.00 0.00 238.00 148.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 144)">
+<title>%3</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-144 234,-144 234,4 -4,4"/>
+<g id="clust1" class="cluster">
+<title>cluster01</title>
+<polygon fill="none" stroke="#000000" points="8,-8 8,-132 222,-132 222,-8 8,-8"/>
+</g>
+<!-- n01 -->
+<g id="node1" class="node">
+<title>n01</title>
+<ellipse fill="none" stroke="#000000" cx="115" cy="-106" rx="42.4939" ry="18"/>
+<text text-anchor="middle" x="115" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">it,ct,ht</text>
+</g>
+<!-- n02 -->
+<g id="node2" class="node">
+<title>n02</title>
+<ellipse fill="none" stroke="#000000" cx="43" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="43" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ft</text>
+</g>
+<!-- n01&#45;&#45;n02 -->
+<g id="edge1" class="edge">
+<title>n01&#45;&#45;n02</title>
+<path fill="none" stroke="#000000" d="M98.3008,-89.3008C86.1705,-77.1705 69.8894,-60.8894 58.1374,-49.1374"/>
+</g>
+<!-- n03 -->
+<g id="node3" class="node">
+<title>n03</title>
+<ellipse fill="none" stroke="#000000" cx="115" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="115" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">gt</text>
+</g>
+<!-- n01&#45;&#45;n03 -->
+<g id="edge2" class="edge">
+<title>n01&#45;&#45;n03</title>
+<path fill="none" stroke="#000000" d="M115,-87.8314C115,-77 115,-63.2876 115,-52.4133"/>
+</g>
+<!-- n04 -->
+<g id="node4" class="node">
+<title>n04</title>
+<ellipse fill="none" stroke="#000000" cx="187" cy="-34" rx="27" ry="18"/>
+<text text-anchor="middle" x="187" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">ot</text>
+</g>
+<!-- n01&#45;&#45;n04 -->
+<g id="edge3" class="edge">
+<title>n01&#45;&#45;n04</title>
+<path fill="none" stroke="#000000" d="M131.6992,-89.3008C143.8295,-77.1705 160.1106,-60.8894 171.8626,-49.1374"/>
+</g>
+</g>
+</svg>

+ 25 - 0
img/4.gv

@@ -0,0 +1,25 @@
+graph
+   {
+   subgraph cluster01
+   {
+   n01[label="it,ct,ht"];
+   n02[label="ft"];
+   n03[label="gt"];
+   n04[label="ot"];
+   n01--n02;
+   n01--n03;
+   n01--n04;
+   n01--m10;
+   n01--m11;
+   n01--m12;
+   n02--m20;
+   n02--m21;
+   n02--m22;
+   n03--m30;
+   n03--m31;
+   n03--m32;
+   n04--m40;
+   n04--m41;
+   n04--m42;
+   }
+   }

+ 188 - 0
img/4.svg

@@ -0,0 +1,188 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.40.1 (20161225.0304)
+ -->
+<!-- Title: %3 Pages: 1 -->
+<svg width="742pt" height="220pt"
+ viewBox="0.00 0.00 742.00 220.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 216)">
+<title>%3</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-216 738,-216 738,4 -4,4"/>
+<g id="clust1" class="cluster">
+<title>cluster01</title>
+<polygon fill="none" stroke="#000000" points="8,-8 8,-204 726,-204 726,-8 8,-8"/>
+</g>
+<!-- n01 -->
+<g id="node1" class="node">
+<title>n01</title>
+<ellipse fill="none" stroke="#000000" cx="489" cy="-178" rx="42.4939" ry="18"/>
+<text text-anchor="middle" x="489" y="-174.3" font-family="Times,serif" font-size="14.00" fill="#000000">it,ct,ht</text>
+</g>
+<!-- n02 -->
+<g id="node2" class="node">
+<title>n02</title>
+<ellipse fill="none" stroke="#000000" cx="167" cy="-106" rx="27" ry="18"/>
+<text text-anchor="middle" x="167" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">ft</text>
+</g>
+<!-- n01&#45;&#45;n02 -->
+<g id="edge1" class="edge">
+<title>n01&#45;&#45;n02</title>
+<path fill="none" stroke="#000000" d="M451.4323,-169.5998C385.2388,-154.7987 249.4773,-124.4421 192.7756,-111.7635"/>
+</g>
+<!-- n03 -->
+<g id="node3" class="node">
+<title>n03</title>
+<ellipse fill="none" stroke="#000000" cx="373" cy="-106" rx="27" ry="18"/>
+<text text-anchor="middle" x="373" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">gt</text>
+</g>
+<!-- n01&#45;&#45;n03 -->
+<g id="edge2" class="edge">
+<title>n01&#45;&#45;n03</title>
+<path fill="none" stroke="#000000" d="M464.9647,-163.0816C443.5963,-149.8184 412.7312,-130.6607 392.7661,-118.2686"/>
+</g>
+<!-- n04 -->
+<g id="node4" class="node">
+<title>n04</title>
+<ellipse fill="none" stroke="#000000" cx="451" cy="-106" rx="27" ry="18"/>
+<text text-anchor="middle" x="451" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">ot</text>
+</g>
+<!-- n01&#45;&#45;n04 -->
+<g id="edge3" class="edge">
+<title>n01&#45;&#45;n04</title>
+<path fill="none" stroke="#000000" d="M479.6067,-160.2022C473.6286,-148.8753 465.9287,-134.2859 460.0235,-123.0972"/>
+</g>
+<!-- m10 -->
+<g id="node5" class="node">
+<title>m10</title>
+<ellipse fill="none" stroke="#000000" cx="527" cy="-106" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="527" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">m10</text>
+</g>
+<!-- n01&#45;&#45;m10 -->
+<g id="edge4" class="edge">
+<title>n01&#45;&#45;m10</title>
+<path fill="none" stroke="#000000" d="M498.3933,-160.2022C504.3274,-148.9586 511.9582,-134.5003 517.8459,-123.3446"/>
+</g>
+<!-- m11 -->
+<g id="node6" class="node">
+<title>m11</title>
+<ellipse fill="none" stroke="#000000" cx="607" cy="-106" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="607" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">m11</text>
+</g>
+<!-- n01&#45;&#45;m11 -->
+<g id="edge5" class="edge">
+<title>n01&#45;&#45;m11</title>
+<path fill="none" stroke="#000000" d="M513.4497,-163.0816C534.6465,-150.1479 565.0296,-131.609 585.3591,-119.2046"/>
+</g>
+<!-- m12 -->
+<g id="node7" class="node">
+<title>m12</title>
+<ellipse fill="none" stroke="#000000" cx="687" cy="-106" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="687" y="-102.3" font-family="Times,serif" font-size="14.00" fill="#000000">m12</text>
+</g>
+<!-- n01&#45;&#45;m12 -->
+<g id="edge6" class="edge">
+<title>n01&#45;&#45;m12</title>
+<path fill="none" stroke="#000000" d="M523.2375,-167.3212C555.4683,-157.0283 604.9388,-140.6094 647,-124 652.0997,-121.9862 657.4949,-119.6813 662.616,-117.4084"/>
+</g>
+<!-- m20 -->
+<g id="node8" class="node">
+<title>m20</title>
+<ellipse fill="none" stroke="#000000" cx="47" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="47" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m20</text>
+</g>
+<!-- n02&#45;&#45;m20 -->
+<g id="edge7" class="edge">
+<title>n02&#45;&#45;m20</title>
+<path fill="none" stroke="#000000" d="M146.9147,-93.9488C125.2971,-80.9783 91.0283,-60.417 68.7767,-47.066"/>
+</g>
+<!-- m21 -->
+<g id="node9" class="node">
+<title>m21</title>
+<ellipse fill="none" stroke="#000000" cx="127" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="127" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m21</text>
+</g>
+<!-- n02&#45;&#45;m21 -->
+<g id="edge8" class="edge">
+<title>n02&#45;&#45;m21</title>
+<path fill="none" stroke="#000000" d="M157.5206,-88.937C151.2366,-77.626 143.0157,-62.8282 136.6893,-51.4407"/>
+</g>
+<!-- m22 -->
+<g id="node10" class="node">
+<title>m22</title>
+<ellipse fill="none" stroke="#000000" cx="207" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="207" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m22</text>
+</g>
+<!-- n02&#45;&#45;m22 -->
+<g id="edge9" class="edge">
+<title>n02&#45;&#45;m22</title>
+<path fill="none" stroke="#000000" d="M176.4794,-88.937C182.7634,-77.626 190.9843,-62.8282 197.3107,-51.4407"/>
+</g>
+<!-- m30 -->
+<g id="node11" class="node">
+<title>m30</title>
+<ellipse fill="none" stroke="#000000" cx="287" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="287" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m30</text>
+</g>
+<!-- n03&#45;&#45;m30 -->
+<g id="edge10" class="edge">
+<title>n03&#45;&#45;m30</title>
+<path fill="none" stroke="#000000" d="M356.0092,-91.7751C341.2259,-79.3984 319.8729,-61.5215 304.8437,-48.9389"/>
+</g>
+<!-- m31 -->
+<g id="node12" class="node">
+<title>m31</title>
+<ellipse fill="none" stroke="#000000" cx="367" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="367" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m31</text>
+</g>
+<!-- n03&#45;&#45;m31 -->
+<g id="edge11" class="edge">
+<title>n03&#45;&#45;m31</title>
+<path fill="none" stroke="#000000" d="M371.4859,-87.8314C370.5833,-77 369.4406,-63.2876 368.5344,-52.4133"/>
+</g>
+<!-- m32 -->
+<g id="node13" class="node">
+<title>m32</title>
+<ellipse fill="none" stroke="#000000" cx="447" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="447" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m32</text>
+</g>
+<!-- n03&#45;&#45;m32 -->
+<g id="edge12" class="edge">
+<title>n03&#45;&#45;m32</title>
+<path fill="none" stroke="#000000" d="M388.3328,-91.0816C400.7791,-78.9717 418.2757,-61.9479 430.9189,-49.6465"/>
+</g>
+<!-- m40 -->
+<g id="node14" class="node">
+<title>m40</title>
+<ellipse fill="none" stroke="#000000" cx="527" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="527" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m40</text>
+</g>
+<!-- n04&#45;&#45;m40 -->
+<g id="edge13" class="edge">
+<title>n04&#45;&#45;m40</title>
+<path fill="none" stroke="#000000" d="M466.7472,-91.0816C479.6168,-78.8893 497.7441,-61.7161 510.7486,-49.3961"/>
+</g>
+<!-- m41 -->
+<g id="node15" class="node">
+<title>m41</title>
+<ellipse fill="none" stroke="#000000" cx="607" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="607" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m41</text>
+</g>
+<!-- n04&#45;&#45;m41 -->
+<g id="edge14" class="edge">
+<title>n04&#45;&#45;m41</title>
+<path fill="none" stroke="#000000" d="M472.6319,-95.0052C477.3501,-92.6573 482.3288,-90.2177 487,-88 488.1641,-87.4473 547.3429,-60.8287 582.1478,-45.176"/>
+</g>
+<!-- m42 -->
+<g id="node16" class="node">
+<title>m42</title>
+<ellipse fill="none" stroke="#000000" cx="687" cy="-34" rx="31.3957" ry="18"/>
+<text text-anchor="middle" x="687" y="-30.3" font-family="Times,serif" font-size="14.00" fill="#000000">m42</text>
+</g>
+<!-- n04&#45;&#45;m42 -->
+<g id="edge15" class="edge">
+<title>n04&#45;&#45;m42</title>
+<path fill="none" stroke="#000000" d="M471.9462,-94.4625C476.7927,-92.0944 481.9978,-89.7835 487,-88 555.6558,-63.5221 577.8592,-75.0726 647,-52 652.3455,-50.2162 657.9429,-47.9596 663.1999,-45.6527"/>
+</g>
+</g>
+</svg>

+ 15 - 1
report.md

@@ -450,7 +450,7 @@ end for
 
 考虑`ct,ht`工作量较少,且需要传输较多数据,将`ct,ht`和`it`合并
 
-![a](3.svg)
+![a](img/3.svg)
 
 4)映射
 
@@ -606,10 +606,24 @@ void sigmoid(float *x, float *y, int n) {
 
 ##### 改进
 
+* 优化了数据传输,使用`MPI_Bcast, MPI_Scatter, MPI_Gather`替换了`for-MPI_Send-MPI_Recv`
 
+* 使用管道重定向输入输出
+* 自动化性能测试脚本
+
+*完整代码见附件*
 
 ### closure-Hybrid-omp-mpi
 
+| 问题规模 | OpenMP线程数 | MPI进程数 | 总运行时间(sec) |
+| -------- | ------------ | --------- | --------------- |
+| 64       | 1            | 1         | 0.492           |
+| 128      | 2            | 1         | 1.725           |
+| 256      | 4            | 1         |                 |
+| 256      | 1            | 4         | 6.681           |
+| 512      | 2            | 4         | 46.498          |
+| 1024     | 4            | 4         |                 |
+
 ### gauss-MPI
 
 ##### 性能结果