19 __qpu__ 
void qft_opt_swap(qreg q, 
int shouldSwap) {
 
   21   int nbQubits = q.size();
 
   22   for (
int qIdx = nbQubits - 1; qIdx >= 0; --qIdx) {
 
   23     auto shiftedBitIdx = qIdx + startIdx;
 
   25     for (
int j = qIdx - 1; j >= 0; --j) {
 
   26       const double theta = M_PI/std::pow(2.0, qIdx - j);
 
   27       auto targetIdx = j + startIdx;
 
   28       CPhase(q[shiftedBitIdx], q[targetIdx], theta);
 
   33   int swapCount = (shouldSwap == 0) ? 0 : 1;
 
   34   for (
int count = 0; count < swapCount; ++count) {
 
   35     for (
int qIdx = 0; qIdx < nbQubits/2; ++qIdx) {
 
   36       Swap(q[startIdx + qIdx], q[startIdx + nbQubits - qIdx - 1]);
 
   41 __qpu__ 
void qft_range_opt_swap(qreg q, 
int startIdx, 
int nbQubits, 
int shouldSwap) {
 
   42   for (
int qIdx = nbQubits - 1; qIdx >= 0; --qIdx) {
 
   43     auto shiftedBitIdx = qIdx + startIdx;
 
   45     for (
int j = qIdx - 1; j >= 0; --j) {
 
   46       const double theta = M_PI/std::pow(2.0, qIdx - j);
 
   47       auto targetIdx = j + startIdx;
 
   48       CPhase(q[shiftedBitIdx], q[targetIdx], theta);
 
   53   int swapCount = (shouldSwap == 0) ? 0 : 1;
 
   54   for (
int count = 0; count < swapCount; ++count) {
 
   55     for (
int qIdx = 0; qIdx < nbQubits/2; ++qIdx) {
 
   56       Swap(q[startIdx + qIdx], q[startIdx + nbQubits - qIdx - 1]);
 
   61 __qpu__ 
void qft(qreg q) {
 
   65 __qpu__ 
void iqft_opt_swap(qreg q, 
int shouldSwap) {
 
   67   int nbQubits = q.size();
 
   68   int swapCount = (shouldSwap == 0) ? 0 : 1;
 
   69   for (
int count = 0; count < swapCount; ++count) {
 
   71     for (
int qIdx = 0; qIdx < nbQubits/2; ++qIdx) {
 
   72       Swap(q[startIdx + qIdx], q[startIdx + nbQubits - qIdx - 1]);
 
   76   for (
int qIdx = 0; qIdx < nbQubits - 1; ++qIdx) {
 
   77     H(q[startIdx + qIdx]);
 
   79     for (
int y = qIdx; y >= 0; --y) {
 
   80       const double theta = -M_PI/std::pow(2.0, j - y);
 
   81       CPhase(q[startIdx + j], q[startIdx + y], theta);
 
   85   H(q[startIdx + nbQubits - 1]);
 
   88 __qpu__ 
void iqft_range_opt_swap(qreg q, 
int startIdx, 
int nbQubits, 
int shouldSwap) {
 
   89   int swapCount = (shouldSwap == 0) ? 0 : 1;
 
   90   for (
int count = 0; count < swapCount; ++count) {
 
   92     for (
int qIdx = 0; qIdx < nbQubits/2; ++qIdx) {
 
   93       Swap(q[startIdx + qIdx], q[startIdx + nbQubits - qIdx - 1]);
 
   97   for (
int qIdx = 0; qIdx < nbQubits - 1; ++qIdx) {
 
   98     H(q[startIdx + qIdx]);
 
  100     for (
int y = qIdx; y >= 0; --y) {
 
  101       const double theta = -M_PI/std::pow(2.0, j - y);
 
  102       CPhase(q[startIdx + j], q[startIdx + y], theta);
 
  106   H(q[startIdx + nbQubits - 1]);
 
  109 __qpu__ 
void iqft(qreg q) {