Logo Search packages:      
Sourcecode: ugene version File versions

RepeatFinderSArray.cpp

/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "RepeatFinderSArray.h"
#include <time.h>
#include <math.h>

namespace GB2 {

//const static quint32 NUCL_BITS_SIZE = 2;
//const static quint32 PROTEIN_BITS_SIZE = 5;


RepeatFinderSArrayW::RepeatFinderSArrayW(const char* seqX, quint32 sizeX, const char* seqY, quint32 sizeY, DNAAlphabet* al,  quint32 w, quint32 threads) 
:RepeatFinder(seqX, sizeX, seqY, sizeY, al, w, w),
nThreads(threads), arrayPercent(0), percentDone(0), nRunning(0), active(FALSE), index(NULL)
{
      quint32 maxSize = qMax(SIZE_X, SIZE_Y);
      quint32 minSize = qMin(SIZE_X, SIZE_Y);
      
      bool arrayIsMax = TRUE;
      quint32 gap = w-getWGap(w);
      quint32 maxWithGapSize = maxSize/(gap+1);
      quint32 minWithGapSize = minSize/(gap+1);
      if (maxSize > 1000*1000) {
            if (maxWithGapSize > minSize) {
                  arrayIsMax=FALSE;
            } else if (maxWithGapSize * 8 > 128*1000*1000) { //too many mem to use max -> using min will reduce mem usage
                  arrayIsMax=FALSE;
            } else {
                  double searchK = 1.3;
                  quint32 tMinIndexed = minWithGapSize + searchK*maxSize;
                  quint32 tMaxIndexed = maxWithGapSize + searchK*minSize;
                  if (tMinIndexed < tMaxIndexed) {
                        arrayIsMax=FALSE;
                  } else if /*practical*/ ((gap < 16 && maxSize > 50*1000*1000) ||
                              (gap < 20 && maxSize > 100*1000*1000) ||
                              (gap < 25 && maxSize > 200*1000*1000) ||
                              (gap < 32 && maxSize > 400*1000*1000) ||
                              (gap < 37 && maxSize > 500*1000*1000)) {
                        arrayIsMax = FALSE;
                  }
            }
      }
      arrayIsX = arrayIsMax && SIZE_X >=SIZE_Y;
      arraySeq = arrayIsX ? seqX : seqY;
      searchSeq = arrayIsX ? seqY : seqX;
      ARRAY_SIZE = arrayIsX ? SIZE_X: SIZE_Y;
      SEARCH_SIZE = arrayIsX ? SIZE_Y: SIZE_X;

      index = NULL;
      bitMaskCharBitsNum = (al->getType() == DNAAlphabet_RAW) ? 0 : SArrayIndex::getBitLen(al->getMap().count(true));
      //single thread approximation (reestimated in some algorithms)
      arrayPercent = reflective?(bitMaskCharBitsNum?70:50):(bitMaskCharBitsNum?30:50) * ((float)ARRAY_SIZE)/SEARCH_SIZE * 100;
}

RepeatFinderSArrayW::~RepeatFinderSArrayW() {
      active = FALSE;
      waitThreadsStop();
      if (index!=NULL) {
            delete index;
            index = NULL;
      }
      qDeleteAll(workers);
}


void RepeatFinderSArrayW::waitThreadsStop(){
      foreach (WorkerThread* t, workers) {
            t->wait();
      }
}

RepeatResult* RepeatFinderSArrayW::getResult(quint32 num) {
      tmpEdge = results.at(num);
      return &tmpEdge;
}

quint32 RepeatFinderSArrayW::getNumResults() {
      return results.size();
}

void RepeatFinderSArrayW::startCalculation() {
      active = TRUE;
      quint32 sSize = SEARCH_SIZE;
      if (reflective) {
            resultsMutex.lock();
            tmpEdge.x = 1;
            tmpEdge.y = 1;
            tmpEdge.l = sSize;
            results.append(tmpEdge);
            resultsMutex.unlock();
      }
      

      quint32 n = nRunning = qMin(nThreads, qMax<quint32>(1, sSize / (2*1000/**1000*/)));
      indexMutex.lock();
      quint32 start = 0;
      if (FALSE &&  reflective) {
      } else {
            quint32 len = sSize / n;
            for (quint32 i = 0; i < n; i++) {
                  WorkerThread* t = new WorkerThread(this, start?start-WINDOW_SIZE+1:0, i<n-1?start+len: sSize, i);
                  t->start(QThread::LowPriority);
                  workers.append(t);
                  start+=len;
            }
      }
      nRunning = workers.count();
      reportCounter = 0;
      indexMutex.unlock();
}

void RepeatFinderSArrayW::calculate(RepeatFinderSArrayW::WorkerThread* t) {
      const quint32 W = WINDOW_SIZE;
//    const quint32 W1 = W-1;
//    quint32 aSize = ARRAY_SIZE;
      quint32 sSize = t->sEnd - t->sStart;
//    const char* dataA = arraySeq;
      const char* dataS = searchSeq + t->sStart;
//    const char* dataAEnd = dataA + aSize;
      const char* dataSEnd = dataS + sSize;
      const char* dataSEndW = dataSEnd-W+1;
      bool reflect = reflective;//local
      quint32 reportLen = sSize / (100-arrayPercent);
      const char* reportPos = dataS + reportLen;

      CheckingEdge* pool = NULL;
      CheckingEdge* chain = new CheckingEdge(0, 0, 0);
      chain->next = chain->prev = chain;
//    long t0 = clock();
//    qDebug("preparing index!\n");
      indexMutex.lock();
      if (index == NULL) {
            if (bitMaskCharBitsNum) { 
                  index = new SArrayIndex(arraySeq, ARRAY_SIZE, W, active, unknownChar, al->getMap());      
            } else {
                  index = new SArrayIndex(arraySeq, ARRAY_SIZE, W, active, unknownChar);  
            }
            if (resultsListener!=NULL) {
                  resultsListener->percentDone(arrayPercent);
            }
            percentDone=arrayPercent;
      }
      indexMutex.unlock();
      SArrayIndex* _index = index;
      if (!active) {
            return;
      }
//    long t1 = clock();
//    qDebug("searching (index time: %d)!\n", (t1-t0));
      quint32 nNew = 0;
      quint32 nMatches = 0;
      quint32 a;
      quint32 bitValue = 0xFFFFFFFF;
      quint32 charBitsNum = bitMaskCharBitsNum;
      quint32 wCharsInMask = index->wCharsInMask;
      quint32 wCharsInMask1 = wCharsInMask-1;
      //const quint32* bm = bitMask;
      const QBitArray& bm = al->getMap(); //FIXME check performance
      quint32 bitFilter = index->bitFilter;;
      for (const char* runner = dataS; runner < dataSEndW && active; runner++) {
            if (runner==reportPos) {
                  report();
                  reportPos+=reportLen;
            }
            if (chain!=chain->next && chain->next->lastS < runner) {
                  //validating candidates to final edges
                  for (CheckingEdge* item = chain->next; item->lastS < runner && item!=chain;) {
                        CheckingEdge* next = item->next;
                        item->fromChain();
                        quint32 len = item->lastS - item->s;
                        quint32 s = item->s - dataS;
                        quint32 a = item->diag + s;
                        addToResults(a, s, len, t);
                        item->next = pool;
                        item->prev = NULL;
                        pool = item;
                        item = next;
                  }
            }
            if (bitMaskCharBitsNum ) {
                  char c = *(runner+wCharsInMask1);
                  if (c != unknownChar && runner!= dataS) {
                        bitValue = ((bitValue<<charBitsNum) | bm[c]) & bitFilter;
                  } else {
                        bitValue = 0;
                        runner!=dataS && (runner+=wCharsInMask);
                        for (quint32 cleanChars = 0; cleanChars < wCharsInMask && runner < dataSEnd; runner++) {
                              if (*runner==unknownChar) {
                                    cleanChars = 0;
                                    bitValue = 0;
                              } else {
                                    bitValue = (bitValue<<charBitsNum) | bm[*runner];
                                    cleanChars++;
                              }
                        }
                        runner-=wCharsInMask;
                        if(runner >= dataSEndW) {
                              break;
                        }
                  }
                  if (!_index->findBit(t, bitValue, runner)) {
                        continue;
                  }
            } else {
                  if (!_index->find(t, runner)) {
                        continue;
                  }
            }

            quint32 s = runner - dataS;
            quint32 globalS = s + t->sStart;
            while ((a = _index->nextArrSeqPos(t))!=0xFFFFFFFF) {
                  nMatches++;
                  if (reflect && globalS >= a) {
                        continue;
                  }
                  //checking merge
                  bool newEdge = TRUE;
                  qint32 diag = a-s;
                  CheckingEdge* item;
                  for (item = chain->next; item!=chain;item=item->next) {
                        if (item->diag == diag) {
                              if (item->lastS < runner) { // gap while passing unknownChar
                                    break;
                              }
                              newEdge = FALSE;
                              item->lastS= runner + W;
                              item->fromChain();
                              item->toChain(chain);//making item last
                              break;
                        }
                  } 
                  if (newEdge) {
                        if (pool == NULL) {
                              item = new CheckingEdge(runner, diag, runner+W);
                              nNew++;
                        } else {
                              item = pool;
                              pool = pool->next;
                        }
                        item->s = runner;
                        item->diag = diag;
                        item->lastS = runner+W;
                        item->toChain(chain);
                  }
            }
      }
      CheckingEdge* item;
      for (item = chain->next; item!=chain;) {
            quint32 len = item->lastS - item->s;
            quint32 s = item->s - dataS;
            quint32 a = item->diag + s;
            addToResults(a, s, len, t);
            CheckingEdge* prev = item;
            item=item->next;
            delete prev;
      }
      delete chain;
      for (item = pool; item!=NULL;) {
            CheckingEdge* prev = item;
            item=item->next;
            delete prev;
      }


//    long t2 = clock();
//    qDebug("Done, nResults %d, nNew %d, nMatches %d\n", results.size(), nNew, nMatches);
//    qDebug("Done. Preparing tree time %d, searching time %d\n!!!", (t1-t0), (t2-t1));
}

void RepeatFinderSArrayW::calculateProgressive(RepeatFinderSArrayW::WorkerThread* t) {
      //    printf("Thread started: %d, from %d, to %d\n", t->tid, t->sStart, t->sEnd);
      const quint32 W = WINDOW_SIZE;
//    const quint32 W1 = W-1;
      const quint32 W_GAP = getWGap(W);
      const quint32 GAP = W-W_GAP; 
      //qDebug("W: %d, GAP %d\n", W, GAP);
      quint32 aSize = ARRAY_SIZE;
      quint32 sSize = t->sEnd - t->sStart;
      const char* dataA = arraySeq;
      const char* dataS = searchSeq + t->sStart;
      const char* dataAEnd = dataA + aSize;
      const char* dataSEnd = dataS + sSize;
      const char* dataSCheckEnd= dataSEnd-W_GAP+1;
      bool reflect = reflective;//local
      arrayPercent = 50/(GAP+1);//todo
      quint32 reportLen = sSize / (100-arrayPercent);
      const char* reportPos = dataS + reportLen;

      CheckingEdge* pool = NULL;
      CheckingEdge** chains = (CheckingEdge**)(new quint32[GAP+1]);
      quint32 chainIdx=0;
      for (;chainIdx < GAP+1;chainIdx++) {
            chains[chainIdx]=new CheckingEdge(0, 0, 0);
            chains[chainIdx]->next = chains[chainIdx]->prev = chains[chainIdx];
      }
        long t0 = clock();
        Q_UNUSED(t0);
      indexMutex.lock();
      if (index == NULL) {
            if (bitMaskCharBitsNum) { 
                  index = new SArrayIndex(arraySeq, ARRAY_SIZE, W_GAP, active, unknownChar, al->getMap(), GAP);   
            } else {
                  index = new SArrayIndex(arraySeq, ARRAY_SIZE, W_GAP, active, unknownChar, QBitArray(), GAP);    
            }
            if (resultsListener!=NULL) {
                  resultsListener->percentDone(arrayPercent);
            }
            percentDone=arrayPercent;
            //printf("index created by thread %d\n", t->tid);
      }
      indexMutex.unlock();
      SArrayIndex* _index = index;
      if (!active) {
            return;
      }
      long t1 = clock();
        Q_UNUSED(t1);
        //printf("searching (index time: %d)!\n", (t1-t0));
      quint32 nNew = 0;
      //quint32 nMatches = 0;
      quint32 a;
      quint32 bitValue = 0xFFFFFFFF;
      quint32 charBitsNum = bitMaskCharBitsNum;
      quint32 wCharsInMask = index->wCharsInMask;
      //const quint32* bm = bitMask;
      const QBitArray& bm = al->getMap();
      quint32 bitFilter = index->bitFilter;;
      chainIdx=0;
      for (const char* runner = dataS; runner < dataSCheckEnd && active; runner++, chainIdx=chainIdx==GAP?0:chainIdx+1) {
            if (runner >= reportPos) {
                  report();
                  reportPos+=reportLen;
            }
            CheckingEdge* chain = chains[chainIdx];
            if (chain!=chain->next && chain->next->lastS < runner) {
                  //validating candidates to final edges
                  for (CheckingEdge* item = chain->next; item->lastS < runner && item!=chain;) {
                        CheckingEdge* next = item->next;
                        item->fromChain();
                        // dorazvit vpered:
                        const char* lastS = item->lastS;
                        const char* lastA = dataA + (lastS-dataS) + item->diag;
                        if (unknownChar) {
                    for (;lastS < dataSEnd && lastA < dataAEnd && *lastS!=unknownChar && *lastS == *lastA; lastS++, lastA++){};
                        } else {
                    for (;lastS < dataSEnd && lastA < dataAEnd && *lastS == *lastA; lastS++, lastA++){};
                        }
                        quint32 len = lastS - item->s;
                        if (len>=W) {
                              quint32 s = item->s - dataS;
                              quint32 a = item->diag + s;
                              addToResults(a, s, len, t);
                        }
                        item->next = pool;
                        item->prev = NULL;
                        pool = item;
                        item = next;
                  }
            }
            if (bitMaskCharBitsNum ) {
                  char c = *(runner+wCharsInMask-1);
                  if (c != unknownChar && runner!= dataS) {
                        bitValue = ((bitValue<<charBitsNum) | bm[c]) & bitFilter;
                  } else {
                        bitValue = 0;
                        runner!=dataS && (runner+=wCharsInMask);
                        for (quint32 cleanChars = 0; cleanChars < wCharsInMask && runner < dataSEnd; runner++) {
                              if (*runner==unknownChar) {
                                    cleanChars = 0;
                                    bitValue = 0;
                              } else {
                                    bitValue = (bitValue<<charBitsNum) | bm[*runner];
                                    cleanChars++;
                              }
                        }
                        runner-=wCharsInMask;
                        if(runner >= dataSCheckEnd) {
                              break;
                        }
                        chainIdx=(runner-dataS)%(GAP+1);
                        chain = chains[chainIdx];
                  }
                  if (!_index->findBit(t, bitValue, runner)) {
                        continue;
                  }
            } else {
                  if (!_index->find(t, runner)) {
                        continue;
                  }
            }
            quint32 s = runner - dataS;
            while ((a = _index->nextArrSeqPos(t))!=0xFFFFFFFF) {
                  //nMatches++;
                  if (reflect && s + t->sStart >= a) {
                        continue;
                  }
                  //checking merge
                  qint32 diag = a-s;
                  bool merged = FALSE;
                  CheckingEdge* item;
                  for (item = chain->next; item!=chain; item=item->next) {
                        if (item->diag != diag) {
                              continue;
                        }
                        if (item->lastS < runner) { // gap while passing unknownChar
                              break;
                        }
                        merged = TRUE;
                        item->lastS = runner + W_GAP;
                        item->fromChain();
                        item->toChain(chain);//making item last
                        break;
                  } 
                  if (!merged) {
                        //dorazvit nazad
                        const char* itemS = runner - 1;
                        const char* itemA = dataA + a - 1;
                        if (unknownChar) {
                    for (;itemS >= dataS && itemA >= dataA && *itemS!=unknownChar && *itemS == *itemA; itemS--, itemA--){};
                        } else {
                    for (;itemS >= dataS && itemA >= dataA && *itemS == *itemA; itemS--, itemA--){};
                        }
                        itemS++;
                        if (pool == NULL) {
                              item = new CheckingEdge(itemS, diag, runner + W_GAP);
                              nNew++;
                        } else {
                              item = pool;
                              pool = pool->next;
                              item->s = itemS;
                              item->diag = diag;
                              item->lastS = runner + W_GAP;
                        }
                        item->toChain(chain);
                  }
            }
      }
      CheckingEdge* item;
      for (chainIdx=0; chainIdx<GAP+1; chainIdx++) {
            CheckingEdge* chain = chains[chainIdx];
            for (item = chain->next; item!=chain;) {
                  //dorazvit vpered
                  const char* lastS = item->lastS;
                  const char* lastA = dataA + (lastS-dataS) + item->diag;
                  if (unknownChar) {
                for (;lastS < dataSEnd && lastA < dataAEnd && *lastS!=unknownChar && *lastS == *lastA; lastS++, lastA++){};
                  } else {
                for (;lastS < dataSEnd && lastA < dataAEnd && *lastS == *lastA; lastS++, lastA++){};
                  }
                  item->lastS = lastS;

                  quint32 len = item->lastS - item->s;
                  quint32 s = item->s - dataS;
                  quint32 a = item->diag + s;
                  if (len >=W) {
                        addToResults(a, s, len, t);
                  }
                  CheckingEdge* prev = item;
                  item=item->next;
                  delete prev;
            }
            delete chains[chainIdx];
      }
      delete chains;
      for (item = pool; item!=NULL;) {
            CheckingEdge* prev = item;
            item=item->next;
            delete prev;
      }

//    long t2 = clock();
      //printf("Done, nResults %d, nNew %d, nMatches %d\n", results.size(), nNew, /*nMatches*/-1);
      //printf("Done. Thread %d, Preparing tree time: %d, searching time: %d, sum: %d\n!!!", t->tid, (t1-t0), (t2-t1), (t2-t0));
}


void RepeatFinderSArrayW::calculateDoubleIndex(RepeatFinderSArrayW::WorkerThread* t) {
      if (t->tid!=0) {
            return;
      }
      const quint32 W = WINDOW_SIZE;
//    const quint32 W1 = W-1;
      const quint32 W_GAP = getWGap(W);
      const quint32 GAP = W-W_GAP; 
      qDebug("W: %d, GAP %d\n", W, GAP);
      quint32 aSize = ARRAY_SIZE;
      quint32 sSize = t->sEnd - t->sStart;
      const char* dataA = arraySeq;
      const char* dataS = searchSeq + t->sStart;
      const char* dataAEnd = dataA + aSize;
      const char* dataSEnd = dataS + sSize;
//    const char* dataSEndW = dataSEnd-W+1;
      bool reflect = reflective;//local

//    CheckingEdge* pool = NULL;
      CheckingEdge* chain = new CheckingEdge(0, 0, 0);
      chain->next = chain->prev = chain;
      indexMutex.lock();
      SArrayIndex* indexA = NULL;
      SArrayIndex* indexS = NULL;
      bool& _active = active;
//    quint32 percentDone = 0;
      long t0 = clock();
      float searchIndexWeight = SEARCH_SIZE / (float)ARRAY_SIZE;
      if (index == NULL) {
            indexS = new SArrayIndex(searchSeq, ARRAY_SIZE, W_GAP, active, unknownChar, al->getMap(), GAP); 
            if (resultsListener!=NULL) {
                  resultsListener->percentDone( 100 * searchIndexWeight /( searchIndexWeight + GAP+1));
            }
      }
      indexMutex.unlock();
      SArrayIndex* _index = index;
        Q_UNUSED(_index);
        if (!_active) {
            delete indexS;
            return;
      }
      long t1 = clock();
        qDebug("searching (first index time: %ld)!\n", (t1-t0));
      //unknownChar=0;
      for (quint32 offs = 0; offs <= GAP && _active; offs++) {
            indexA = new SArrayIndex(arraySeq, ARRAY_SIZE, W_GAP, active, unknownChar, al->getMap(), GAP, offs);
            if (resultsListener!=NULL) {
                        resultsListener->percentDone( (int) (100 * (searchIndexWeight+offs+1) /(searchIndexWeight+GAP+1)));
            }
            long tt0 = clock();

            quint32* maskA = indexA->bitMask;
            quint32* maskS = indexS->bitMask;
            quint32* maskAEnd = maskA + indexA->size ;
            quint32* maskSEnd = maskS + indexS->size;
            quint32* maskAEnd1 = maskAEnd - 1;
            quint32* maskSEnd1 = maskSEnd - 1;
            quint32* arunner = maskA;
            quint32* srunner = maskS;
            quint32* sArray = indexS->sArray;
            quint32 wCharsInMask = indexA->wCharsInMask;
            do {
                  while (*arunner != *srunner) {
                        if (*arunner < *srunner) {
                              if (arunner < maskAEnd1) {
                                    arunner++;
                                    continue;
                              } else {
                                    goto out;
                              }
                        } else {
                              if (srunner < maskSEnd1) {
                                    srunner++;
                                    continue;
                              } else {
                                    goto out;
                              }
                        }
                  }
                  while (*arunner==*srunner) {
                        if (arunner == maskAEnd || srunner == maskSEnd) {
                              goto out;
                        }
                        quint32 bitMask = *arunner;
                        char* seq = (char*)sArray[srunner-maskS];
                        char* seqAfter = seq+wCharsInMask;
//                      char* sseqA = (char*)indexA->sArray[arunner-maskA];
                        qint32 rc = indexA->compareAfterBits(arunner-maskA, seqAfter);
                        if (rc!=0) {
                    (rc < 0 && (arunner++)) || (srunner++);
                        } else {//match
                              quint32 nA=1 , nS=1;
                    for (arunner++; arunner < maskAEnd && *arunner==bitMask && !indexA->compareAfterBits(arunner-maskA, seqAfter); arunner++, nA++){};
                    for (srunner++; srunner < maskSEnd && *srunner==bitMask && !indexS->compareAfterBits(srunner-maskS, seqAfter); srunner++, nS++){};
                              quint32 posA = arunner-maskA-nA;
                              quint32 posS = srunner-maskS-nS;
                              //if (reflect && nA == 1 && posA==posS) {//main diag only
                              //    continue;
                              //}
                              
                              for (quint32 ai = 0; ai < nA; ai++, posA++) {
                                    char* aSeq1 = (char*)indexA->sArray[posA];
                                    char* aEnd1 = aSeq1+W_GAP;
                                    qint32 aa = aSeq1-dataA;
                                    quint32 _posS = posS;
                                    for (quint32 si = 0; si < nS; si++, _posS++) {
                                          char* sSeq1 = (char*)indexS->sArray[_posS];     
                                          if (reflect && (sSeq1-dataS >= aa)) {
                                                continue;
                                          }
                                          char* aSeq = aSeq1-1;
                                          char* sSeq = sSeq1-1;
                                          quint32 gapi = 0;
                            for(; gapi <= GAP && aSeq >= dataA && sSeq >= dataS && *aSeq==*sSeq; gapi++, aSeq--, sSeq--){};
                                          if (gapi > GAP) {
                                                continue;// there will more matches on the same diag
                                          }
                                          aSeq++;
                                          sSeq++;
                                          char* aEnd = aEnd1;
                                          char* sEnd = sSeq1+W_GAP;
                            for (; aEnd < dataAEnd && sEnd < dataSEnd && *aEnd==*sEnd; aEnd++, sEnd++){};
                                          quint32 a = aSeq-dataA;
                                          quint32 s = sSeq-dataS;
                                          quint32 len = aEnd-aSeq;
                                          if (len>=W) {
                                                addToResults2(a+1, s+1, len);
                                          }
                                    }
                              }
                        }
                  }
            } while (_active);
out:
            delete indexA;
            long tt1 = clock();
        qDebug("searching (A index time: %ld)!\n", (tt1-tt0));
      }
      delete indexS;
      long t2 = clock();
    qDebug("searching (second index time: %ld)!\n", (t2-t1));

}


void RepeatFinderSArrayW::calculateReflective(RepeatFinderSArrayW::WorkerThread* t) {
      if (t->sStart != 0) {//singlethreaded
            return;
      }
      long t0 = clock();
      if (bitMaskCharBitsNum) { 
            index = new SArrayIndex(arraySeq, ARRAY_SIZE, WINDOW_SIZE, active, unknownChar, al->getMap());
      } else {
            index = new SArrayIndex(arraySeq, ARRAY_SIZE, WINDOW_SIZE, active, unknownChar);
      }
      long t1 = clock();
        qDebug("searching (index time: %ld)!\n", (t1-t0));
      if (resultsListener!=NULL) {
            resultsListener->percentDone(arrayPercent);
      }
      if (!active) {
            return;
      }
      percentDone=arrayPercent;
      const quint32 W = WINDOW_SIZE;
      const char* data = arraySeq;
      const char* dataEnd = arraySeq + ARRAY_SIZE;
      quint32* sArray = index->sArray;
      quint32* maxPos = sArray + index->size - 1;
      quint32 reportTimes = 100-arrayPercent;
      quint32 reportLen= index->size / reportTimes;
      quint32* reportPos = sArray + reportLen; 
      for(quint32* pos = sArray; pos < maxPos && active; pos++) {
            quint32 nMatches = 0;
            if (bitMaskCharBitsNum) {
            for (; pos < maxPos && !index->compareBitByPos(pos, pos+1); nMatches++, pos++){};
            } else {
            for (; pos < maxPos && !index->compare(*pos, *(pos+1)); nMatches++, pos++){};
            }
            if (pos >= reportPos) {
                  report();
                  reportPos+=reportLen;
            }
            if (nMatches == 0) {
                  continue;
            }
            quint32* startMatches = pos - nMatches;
            quint32* endMatches = pos;
            for (quint32* i = startMatches; i <= endMatches; i++) {
                  for (quint32* j = i+1; j <= endMatches; j++) {
                        char* a0 = (char*)*i; 
                        char* s0 = (char*)*j;
                        if ( a0 < s0 ) { // to avoid double compares
                              char* tmp = a0;
                              a0=s0;
                              s0=tmp;
                        }
                        if (s0>data && *(a0-1)==*(s0-1) && (unknownChar==0 || *(s0-1)!=unknownChar)) {
                              continue;//not first match (will be merged)
                        }
                        char* a1 = a0+W;
                        char* s1 = s0+W;
                        if (unknownChar) {
                    for (;a1 < dataEnd && *a1==*s1 && *a1!=unknownChar ; a1++, s1++){};
                        } else {
                    for (;a1 < dataEnd && *a1==*s1 ; a1++, s1++){};
                        }
                        addToResults2(a0-data+1, s0-data+1, a1-a0);
                  }
            }
      }
      long t2 = clock();
        qDebug("Done, nResults %d\n", results.size());
        qDebug("Done. Preparing tree time %ld, searching time %ld, sum %ld\n!!!", (t1-t0), (t2-t1), (t2-t0));
}



void RepeatFinderSArrayW::report() {
      if (resultsListener!=NULL) {
            indexMutex.lock();
            reportCounter++;
            if (reportCounter == nThreads) {
                  reportCounter=0;
                  percentDone++;
                  //qDebug("percent done %d\n", percentDone);
            //    resultsListener->percentDone(percentDone);
            }
            indexMutex.unlock();
      }
}


void RepeatFinderSArrayW::onFinished(RepeatFinderSArrayW::WorkerThread* t) {
    Q_UNUSED(t);
      indexMutex.lock();
      nRunning--;
      //printf("Finished! left:%d\n", nRunning);
      if (nRunning == 0 ) {
            //join boundary
            for (int j=0; j < bresultsA.size(); j++) {
                  quint32 d = bresultsD[j];
                  quint32 a = bresultsA[j];
                  quint32 l = bresultsL[j];
                  bool merged = FALSE;
                  for (int i=j+1; i < bresultsA.size(); i++) {
                        if (d != bresultsD[i]) {
                              continue;
                        }
                        quint32 a1 = bresultsA[i];
                        quint32 l1 = bresultsL[i];
                        if ( a > a1 && a <= a1+l1) {
                              bresultsL[i]=(a+l)-a1;  
                              merged = TRUE;
                              break;
                        } else if (a1 > a && a1 <= a+l) {
                              quint32 dl=(a1-a);
                              bresultsA[i]=a;
                              bresultsS[i]-=dl;
                              bresultsL[i]+=dl;
                              merged = TRUE;
                              break;
                        }
                  }
                  if (!merged) {
                        addToResults2(a, bresultsS[j], l);
                  }
            }
//          DBG(printf(">>>>nResults:%d\n",  results.size()));
            if (active && resultsListener!=NULL) {
                  resultsListener->percentDone(100);
                  resultsListener->calculationFinished();
            }

            if (index!=NULL) {
                  delete index;
                  index = NULL;
            }
            active = FALSE;
      }
      indexMutex.unlock();
}

void RepeatFinderSArrayW::addToResults(quint32 a, quint32 s, quint32 l, RepeatFinderSArrayW::WorkerThread* t) {
      bool boundary = nThreads > 1 && (s == 0 || s + l == t->sEnd - t->sStart); 
      a++;
      s+=t->sStart+1;
      if (boundary) {
            boundaryMutex.lock();
            bresultsA.append(a);
            bresultsS.append(s);
            bresultsL.append(l);
            bresultsD.append(a-s);
            boundaryMutex.unlock();
      } else {
            addToResults2(a, s, l);
      }
}

void  RepeatFinderSArrayW::addToResults2(quint32 a, quint32 s, quint32 l) {
      //if (1) {
      //    printf("%d, %d, %d\n", a, s, l);
      //    return;
      //}
//    if (a < 0 || s < 0) {
//          printf("err");
//    }
      RepeatResult e;
      e.x = arrayIsX ? a : s;
      e.y = arrayIsX ? s : a;
      e.l = l;

      resultsMutex.lock();
      results.append(e);
      if (reflective) {
            quint32 x = e.x;
            e.x = e.y;
            e.y = x;
            results.append(e);
      }
      resultsMutex.unlock();
}

void RepeatFinderSArrayW::terminateCalculation() {
      active = FALSE;
      waitThreadsStop();
}

quint32 RepeatFinderSArrayW::getWGap(quint32 W) {
      return W<8? W : W<10? W-1: W<12? W-2: W<16? W-3: W<20? W-4: W<30? 16 : W/2+1;
}
//////////////////////////////////////////////////////////////////////////
//Worker
//////////////////////////////////////////////////////////////////////////
RepeatFinderSArrayW::WorkerThread::WorkerThread(RepeatFinderSArrayW* _owner, quint32 _sStart, quint32 _sEnd, quint32 _tid) : owner(_owner), sStart(_sStart), sEnd(_sEnd), tid(_tid) 
{
}

void RepeatFinderSArrayW::WorkerThread::run() {
      if (owner->reflective) {
//          owner->calculateReflective(this);
            owner->calculateProgressive(this);
//          owner->calculateDoubleIndex(this);
//          owner->calculate(this);
      } else {
//          owner->calculate(this);
            owner->calculateProgressive(this);
//          owner->calculateDoubleIndex(this);
      }
      owner->onFinished(this);
}

}//namespace

Generated by  Doxygen 1.6.0   Back to index