Logo Search packages:      
Sourcecode: ugene version File versions

PDBFormat.cpp

/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include <QtCore/QStringList>

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <core_api/AppContext.h>
#include <core_api/Log.h>

#include <gobjects/GObjectTypes.h>
#include <gobjects/BioStruct3DObject.h>
#include <gobjects/AnnotationTableObject.h>
#include <gobjects/DNASequenceObject.h>
#include <gobjects/GObjectRelationRoles.h>
#include <gobjects/GObjectUtils.h>
#include <util_text/TextUtils.h>
#include <time.h>
#include <memory>


#include "DocumentFormatUtils.h"
#include "PDBFormat.h"



namespace GB2 { 


static LogCategory log(ULOG_CAT_IO);
static LogCategory logPerf(ULOG_CAT_PERFORMANCE);

QHash<QByteArray,int> PDBFormat::atomNumMap;
QHash<QByteArray, QChar> PDBFormat::acronymNameMap;
double PDBFormat::atomRadiusTable[NUM_ELEMENTS] = {0.0};


PDBFormat::PDBFormat( QObject* p ) : DocumentFormat(p)
{
      formatName = tr("PDB");
      initUtilityMaps();
}

QStringList PDBFormat::getSupportedDocumentFileExtensions()
{
      QStringList l;
      l << "pdb";
      return l;
}

bool PDBFormat::isDataFormatSupported( const char* data, int size ) const
{
      static const char* formatTag = "HEADER";
      int firstLineSize = 66;

      QByteArray rawData(data, size);
      if (!rawData.startsWith(formatTag) || size < firstLineSize) {
            return false;
      }
      
      return !TextUtils::contains(TextUtils::BINARY, data, size);
}


void PDBFormat::storeDocument( Document* d, TaskStateInfo& ti, IOAdapterFactory* io /*= NULL*/, const QString& newDocURL /*= QString::null*/ )
{
      Q_UNUSED(d); Q_UNUSED(ti); Q_UNUSED(io); Q_UNUSED(newDocURL);
      Q_ASSERT(0);
    ti.error = tr("writing_not_supported");
}

bool PDBFormat::isObjectOpSupported( const Document* d , DocumentFormat::DocObjectOp op, GObjectType t ) const
{
      Q_UNUSED(d); Q_UNUSED(op);
      return (t == GObjectTypes::BIOSTRUCTURE_3D); 
}

bool PDBFormat::checkConstraints( const DocumentFormatConstraints& c ) const
{
      foreach (GObjectType t, c.supportedObjectTypes) {
            if (t!=GObjectTypes::BIOSTRUCTURE_3D) {
                  return false;
            }
      }
      
      if (c.mustSupportWrite) {
            return false;
      }

      if (c.checkRawData) {
            return isDataFormatSupported(c.rawData.constData(), c.rawData.size());
      }

      return true;
}

Document* PDBFormat::loadExistingDocument( IOAdapterFactory* iof, const QString& url, TaskStateInfo& ti, const QVariantMap& fs )
{
      std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
      
      bool ok = io->open(url, IOAdapterMode_Read);
      if (!ok) {
            ti.error = IOAdapter::tr("error_opening_url_for_read '%1'").arg(url);
            return NULL;
      }
      
    log.trace("parsing: " +url);

      BioStruct3D bioStruct;
      PDBParser pdbParser(io.get());
      clock_t t1 =  clock();
    pdbParser.parseBioStruct3D(bioStruct, ti);
      clock_t t2 =  clock();
    logPerf.trace("PDB file parsing time time: " + QString::number((float)( t2 - t1)/ CLOCKS_PER_SEC ));
    
    if (ti.hasErrors() || ti.cancelFlag) {
            return NULL;
      }
      io->close();

    ti.progress = 30;
      bioStruct.calcCenterAndMaxDistance();
    ti.progress = 50;
    bioStruct.generateAnnotations();
    ti.progress = 60;
    calculateBonds(bioStruct);
    ti.progress = 90;

    QList<GObject*> objects;
    QMap< AnnotationTableObject*, DNASequenceObject*> relationsMap;
    
    foreach(int key, bioStruct.moleculeMap.keys()) {
        QByteArray sequence = bioStruct.getRawSequenceByChainId(key);
          QByteArray sequenceName(bioStruct.pdbId + QString(" chain %1 sequence").arg(key).toAscii() );     
        DNASequenceObject* dnaObj = createSequenceObject(sequence,sequenceName);
        AnnotationTableObject* aObj = new AnnotationTableObject(bioStruct.pdbId + QString(" chain %1 annotation").arg(key).toAscii() );
        foreach (SharedAnnotationData sd, bioStruct.moleculeMap.value(key)->annotations) {
            aObj->addAnnotation(new Annotation(sd), ""); 
        }
        objects.append(dnaObj);
        objects.append(aObj);
        relationsMap.insert(aObj, dnaObj);
        
    }    
    
    //AnnotationTableObject* ao = new AnnotationTableObject(bioStruct.pdbId + " annotation");
    //fillBioStruct3DAnnotationTable(ao, bioStruct);
    //objects.append(ao);
      
      //DocumentFormatUtils::addSequenceObject(objects, sequenceName, sequence); 
    BioStruct3DObject* bo = new BioStruct3DObject(bioStruct, bioStruct.pdbId);

      
    objects.append(bo);
      Document* doc = new Document(this, iof, url, objects, fs);

    QMap< AnnotationTableObject*, DNASequenceObject*>::const_iterator i;
    for (i = relationsMap.constBegin(); i != relationsMap.constEnd(); ++i) {
        DNASequenceObject* dnao = i.value();
        AnnotationTableObject* ao = i.key();
        ao->addObjectRelation(dnao, GObjectRelationRole::SEQUENCE);
        bo->addObjectRelation(dnao, GObjectRelationRole::SEQUENCE);
    }

    log.trace("parsing finished: " + url);
    ti.progress = 100;
      return doc;
}


void PDBFormat::initUtilityMaps()
{
      static bool initialized = false;

      if (!initialized) {
            
        // Init atomRadiusTable
        atomRadiusTable[1] = 0.23;
        atomRadiusTable[5] = 0.83;
        atomRadiusTable[6] = 0.68;
        atomRadiusTable[7] = 0.68;
        atomRadiusTable[8] = 0.68;
        atomRadiusTable[9] = 0.64;
        atomRadiusTable[14] = 1.20;
        atomRadiusTable[15] = 1.05;
        atomRadiusTable[16] = 1.02;
        atomRadiusTable[17] = 0.99;
        atomRadiusTable[33] = 1.21;
        atomRadiusTable[34] = 1.22;
        atomRadiusTable[35] = 1.21;
        atomRadiusTable[52] = 1.47;
        atomRadiusTable[53] = 1.40;


        // Init atomNumMap
            atomNumMap.insert("H",1);
            atomNumMap.insert("HE",2);
            atomNumMap.insert("LI",3);
            atomNumMap.insert("BE",4);
            atomNumMap.insert("B",5);
            atomNumMap.insert("C",6);
            atomNumMap.insert("N",7);
            atomNumMap.insert("O",8);
            atomNumMap.insert("F",9);
            atomNumMap.insert("NE",10);
            atomNumMap.insert("NA",11);
            atomNumMap.insert("MG",12);
            atomNumMap.insert("AL",13);
            atomNumMap.insert("SI",14);
            atomNumMap.insert("P",15);
            atomNumMap.insert("S",16);
            atomNumMap.insert("CL",17);
            atomNumMap.insert("AR",18);
            atomNumMap.insert("K",19);
            atomNumMap.insert("CA",20);
            atomNumMap.insert("SC",21);
            atomNumMap.insert("TI",22);
            atomNumMap.insert("V",23);
            atomNumMap.insert("CR",24);
            atomNumMap.insert("MN",25);
            atomNumMap.insert("FE",26);
            atomNumMap.insert("CO",27);
            atomNumMap.insert("NI",28);
            atomNumMap.insert("CU",29);
            atomNumMap.insert("ZN",30);
            atomNumMap.insert("GA",31);
            atomNumMap.insert("GE",32);
            atomNumMap.insert("AS",33);
            atomNumMap.insert("SE",34);
            atomNumMap.insert("BR",35);
            atomNumMap.insert("KR",36);
            atomNumMap.insert("RB",37);
            atomNumMap.insert("SR",38);
            atomNumMap.insert("Y",39);
            atomNumMap.insert("ZR",40);
            atomNumMap.insert("NB",41);
            atomNumMap.insert("MO",42);
            atomNumMap.insert("TC",43);
            atomNumMap.insert("RU",44);
            atomNumMap.insert("RH",45);
            atomNumMap.insert("PD",46);
            atomNumMap.insert("AG",47);
            atomNumMap.insert("CD",48);
            atomNumMap.insert("IN",49);
            atomNumMap.insert("SN",50);
            atomNumMap.insert("SB",51);
            atomNumMap.insert("TE",52);
            atomNumMap.insert("I",53);
            atomNumMap.insert("XE",54);
            atomNumMap.insert("CS",55);
            atomNumMap.insert("BA",56);
            atomNumMap.insert("LA",57);
            atomNumMap.insert("CE",58);
            atomNumMap.insert("PR",59);
            atomNumMap.insert("ND",60);
            atomNumMap.insert("PM",61);
            atomNumMap.insert("SM",62);
            atomNumMap.insert("EU",63);
            atomNumMap.insert("GD",64);
            atomNumMap.insert("TB",65);
            atomNumMap.insert("DY",66);
            atomNumMap.insert("HO",67);
            atomNumMap.insert("ER",68);
            atomNumMap.insert("TM",69);
            atomNumMap.insert("YB",70);
            atomNumMap.insert("LU",71);
            atomNumMap.insert("HF",72);
            atomNumMap.insert("TA",73);
            atomNumMap.insert("W",74);
            atomNumMap.insert("RE",75);
            atomNumMap.insert("OS",76);
            atomNumMap.insert("IR",77);
            atomNumMap.insert("PT",78);
            atomNumMap.insert("AU",79);
            atomNumMap.insert("HG",80);
            atomNumMap.insert("TL",81);
            atomNumMap.insert("PB",82);
            atomNumMap.insert("BI",83);
            atomNumMap.insert("PO",84);
            atomNumMap.insert("AT",85);
            atomNumMap.insert("RN",86);
            atomNumMap.insert("FR",87);
            atomNumMap.insert("RA",88);
            atomNumMap.insert("AC",89);
            atomNumMap.insert("TH",90);
            atomNumMap.insert("PA",91);
            atomNumMap.insert("U",92);
            atomNumMap.insert("NP",93);
            atomNumMap.insert("PU",94);
            atomNumMap.insert("AM",95);
            atomNumMap.insert("CM",96);
            atomNumMap.insert("BK",97);
            atomNumMap.insert("CF",98);
            atomNumMap.insert("ES",99);
            atomNumMap.insert("FM",100);
            atomNumMap.insert("MD",101);
            atomNumMap.insert("NO",102);
            atomNumMap.insert("LR",103);
                        
            // init acronymNameMap
            
        // amino acid residues
            acronymNameMap.insert("ALA", 'A');
            acronymNameMap.insert("VAL", 'V');
            acronymNameMap.insert("PHE", 'F');
            acronymNameMap.insert("PRO", 'P');
            acronymNameMap.insert("MET", 'M');
            acronymNameMap.insert("ILE", 'I');
            acronymNameMap.insert("LEU", 'L');
            acronymNameMap.insert("ASP", 'D');
            acronymNameMap.insert("GLU", 'E');
            acronymNameMap.insert("GLY", 'G');
            acronymNameMap.insert("LYS", 'K');
            acronymNameMap.insert("ARG", 'R');
            acronymNameMap.insert("SER", 'S');
            acronymNameMap.insert("THR", 'T');
            acronymNameMap.insert("TYR", 'Y');
            acronymNameMap.insert("HIS", 'H');
            acronymNameMap.insert("CYS", 'C');
            acronymNameMap.insert("ASN", 'N');
            acronymNameMap.insert("GLN", 'Q');
            acronymNameMap.insert("TRP", 'W');
            // dna
            acronymNameMap.insert("DA", 'A'); 
            acronymNameMap.insert("DT", 'T'); 
            acronymNameMap.insert("DG", 'G'); 
            acronymNameMap.insert("DC", 'C'); 
        acronymNameMap.insert("A", 'A'); 
        acronymNameMap.insert("T", 'T'); 
        acronymNameMap.insert("G", 'G'); 
        acronymNameMap.insert("C", 'C'); 
        acronymNameMap.insert("U", 'U'); 



            // now we've initialized utility maps     
            initialized = true;
      }
}

PDBFormat::PDBParser::PDBParser(IOAdapter* _io) : io(_io), currentPDBLine(""), currentChainId(' ') {

      currentMoleculeId = 0;
      flagMultipleModels = false;
      flagAtomRecordPresent = false;
      flagSequenceRecordPresent = false;
}

#define READ_BUF_SIZE 4096


void PDBFormat::PDBParser::parseBioStruct3D( BioStruct3D& biostruct, TaskStateInfo& ti )
{
      
      QByteArray readBuff(READ_BUF_SIZE+1, 0);
      char* buf = readBuff.data();
      qint64 len = 0;
      
      while (!ti.cancelFlag && !ti.hasErrors()) {
            
            bool lineOk = true;

            len = io->readUntil(buf, READ_BUF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk);
            if (len == 0) {
                  break;
            }
            if (!lineOk) {
            ti.error = GB2::PDBFormat::tr("line_is_too_long");
                  return;
            }
            currentPDBLine = QString(QByteArray::fromRawData(buf, len));

            if (currentPDBLine.startsWith("HEADER")) {
                  parseHeader(biostruct, ti);
                  continue;
            }
            
            if (currentPDBLine.startsWith("COMPND")) {
                  parseCompound(biostruct, ti);
                  continue;
            }
            
        if (currentPDBLine.startsWith("DBREF ")) {
            parseDBRef(biostruct, ti);
            continue;
        }

            if (currentPDBLine.startsWith("SEQRES")) {
                  parseSequence(biostruct, ti);
                  continue;
            }
            
            if (currentPDBLine.startsWith("HELIX ") || currentPDBLine.startsWith("SHEET ")
                  || currentPDBLine.startsWith("TURN  ")) {
                        parseSecondaryStructure(biostruct, ti);
                        continue;
            }


            if (currentPDBLine.startsWith("ATOM  ") || currentPDBLine.startsWith("HETATM")) {
                  
            // Omit all models except of the first one
                  if (flagMultipleModels)
                        continue;
                parseAtom(biostruct, ti);
        
            continue;
            }

            if (currentPDBLine.startsWith("SPLIT ")) {
            //TODO: add SPLIT record support!
            ti.error = GB2::PDBFormat::tr("SPLIT_record_not_supported");
            break;
            }
            
            if (currentPDBLine.startsWith("ENDMDL")) {
            log.details(tr("Warning: ENDMDL tag not supported!"));
            flagMultipleModels = true;
                  continue;
            }
      }
      
    if (ti.cancelFlag || ti.hasErrors()) {
            return;
    }

      if (!flagAtomRecordPresent || !flagSequenceRecordPresent) {
        ti.error = GB2::PDBFormat::tr("mandatory_records_absent");
      }
}

void PDBFormat::PDBParser::parseHeader( BioStruct3D& biostruct, TaskStateInfo& ti )
{
      /*
      Record Format 
      COLUMNS DATA TYPE FIELD DEFINITION
      ------------------------------------------------------------------------------------ 
      1 - 6       Record name "HEADER" 
      11 - 50           Classifies the molecule(s). 
      51 - 59           Deposition date. This is the date the coordinates were received at the PDB. 
      63 - 66           This identifier is unique within the PDB.
      */
      
      if (currentPDBLine.length() < 66)
      {
        ti.error = GB2::PDBFormat::tr("invalid_header_record");
            return;
      }

      QString desc = currentPDBLine.mid(10,40).trimmed();
      QByteArray pdbId = currentPDBLine.mid(62,4).toAscii();
      biostruct.descr = desc;
      biostruct.pdbId = pdbId;
}


void PDBFormat::PDBParser::parseAtom( BioStruct3D& biostruct, TaskStateInfo& ti )
{     
      /*
      Record Format 
      
      COLUMNS           DEFINITION 
      1 - 6       Record name "ATOM " 
      7 - 11            Atom serial number. 
      13 - 16           Atom name. 
      17                Alternate location indicator. 
      18 - 20           Residue name. 
      22                Chain identifier. 
      23 - 26           Residue sequence number. 
      27                Code for insertion of residues. 
      31 - 38           Orthogonal coordinates for X in Angstroms. 
      39 - 46           Orthogonal coordinates for Y in Angstroms. 
      47 - 54           Orthogonal coordinates for Z in Angstroms. 
      55 - 60           Occupancy. 
      61 - 66           Temperature factor. 
      77 - 78           Element symbol, right-justified. 
      79 - 80           Charge on the atom. 
      */
      
      if (currentPDBLine.length() < 80)
      {
        ti.error = GB2::PDBFormat::tr("ivalid_atom_record");
            return;
      }
      
      if (!flagAtomRecordPresent)
        flagAtomRecordPresent = true;

    bool isHetero = false;
    if (currentPDBLine.startsWith("HETATM"))
        isHetero = true;

      int id = currentPDBLine.mid(6,5).toInt();
      QByteArray atomName = currentPDBLine.mid(12,4).toAscii();
    int residueIndex = currentPDBLine.mid(22,4).toInt();
      
      int atomicNumber = 0;
      QByteArray elementName = currentPDBLine.mid(76,2).toAscii().trimmed();
      atomicNumber = PDBFormat::getElementNumberByName(elementName);
      
      QChar chainId = currentPDBLine.at(21);
      int chainIndex = getChainIndexByName(chainId);
    if (chainIndex == 0) {
        Q_ASSERT(isHetero);
    } else if (!residueStartIndexMap.contains(chainIndex)) {
        // These chains are not mentioned in DBREF record
        int startIndex = residueIndex;
        if (unreferencedResidueStartIndexMap.contains(chainIndex)) {
            startIndex = unreferencedResidueStartIndexMap.value(chainIndex);
        } else {
            unreferencedResidueStartIndexMap.insert(chainIndex, startIndex);
        }
        residueIndex = residueIndex - startIndex + 1; 
    }
    
    
    
      double x,y,z;
      x = currentPDBLine.mid(30,8).toDouble();
      y = currentPDBLine.mid(38,8).toDouble();
      z = currentPDBLine.mid(46,8).toDouble();
      double occupancy = currentPDBLine.mid(54,6).toDouble();
      double temperature = currentPDBLine.mid(60,6).toDouble();
      
      SharedAtom a(new AtomData);
      a->chainIndex = chainIndex;
      a->residueIndex = residueIndex;
      a->atomicNumber = atomicNumber;
      a->name = atomName;
      a->coord3d = Vector3D(x,y,z);
      a->occupancy = occupancy;
      a->temperature = temperature;
      
    if (isHetero && chainIndex == 0) {
        biostruct.hetAtomMap.insert(id, a);
        return;
    }
        

      SharedMolecule mol = biostruct.moleculeMap.take(chainIndex);
    mol->atomMap.insert(id, a);
    biostruct.moleculeMap.insert(chainIndex, mol);

}

void PDBFormat::PDBParser::parseSecondaryStructure( BioStruct3D& biostruct, TaskStateInfo& ti )
{
      /*
      
      Record Format Examples
      
      1         2         3
      0123456789012345678901234567890123456
      HELIX    1  H1 ILE      7  LEU     18
      HELIX    2  H2 PRO     19  PRO     19
      HELIX    3  H3 GLU     23  TYR     29
      HELIX    4  H4 THR     30  THR     30
      SHEET    1  S1 2 THR     2  CYS     4
      SHEET    2  S2 2 CYS    32  ILE    35
      SHEET    3  S3 2 THR    39  PRO    41
      TURN     1  T1 GLY    42  TYR    44
      
      */
      
      SecondaryStructureData::Type structureType = SecondaryStructureData::TYPE_NONE;
      int startIndex;
      int endIndex;
      int startChainIDIndex;
      int endChainIDIndex;
      
      
      if (currentPDBLine.startsWith("HELIX ")) {
            structureType = SecondaryStructureData::TYPE_HELIX;
            startChainIDIndex = 19;
            endChainIDIndex = 31;
            startIndex = 21;
            endIndex = 33;
      } else if (currentPDBLine.startsWith("SHEET ")) {
            structureType = SecondaryStructureData::TYPE_STRAND;
            startChainIDIndex = 21;
            endChainIDIndex = 32;
            startIndex = 22;
            endIndex = 33;
      } else if (currentPDBLine.startsWith("TURN  ")) {
            // TODO: is this is obsolete?
        structureType = SecondaryStructureData::TYPE_TURN;
            startChainIDIndex = 19;
            endChainIDIndex = 30;
            startIndex = 20;
            endIndex = 31;
      } else {
            Q_ASSERT(0);
            return;
      }
      
      if (currentPDBLine.length() < endIndex + 4)
      {
        ti.error = GB2::PDBFormat::tr("invalid_secondary_structure_record");
            return;
      }
      
      QChar startChainID = currentPDBLine.at(startChainIDIndex);
      QChar endChainID = currentPDBLine.at(endChainIDIndex);

    Q_ASSERT(startChainID == endChainID);
      int chainIndex = getChainIndexByName(startChainID);
    Q_ASSERT(chainIndex != 0);
    int startSequenceNumber = currentPDBLine.mid(startIndex, 4).toInt();
      int endSequenceNumber = currentPDBLine.mid(endIndex, 4).toInt();
            

      SharedSecondaryStructure secStruct( new SecondaryStructureData );
      secStruct->type = structureType;
      secStruct->chainIndex = chainIndex;
    secStruct->startSequenceNumber = startSequenceNumber;
      secStruct->endSequenceNumber = endSequenceNumber;
      biostruct.secondaryStructures.append(secStruct);

}

void PDBFormat::PDBParser::parseCompound(BioStruct3D& biostruct, TaskStateInfo& ti) 
{
      /*
      Record Format
      
      COLUMNS DATA TYPE FIELD DEFINITION
      1 - 6 Record name "COMPND" 
      8 - 10      Continuation
      11 - 70 Description of the molecular components.
      */

      const char* idTag = "MOL_ID";
      const char* nameTag = "MOLECULE";
      const char* chainsTag = "CHAIN";
//    const char* engTag = "ENGINEERED";
      const char* savedTag = "SAVED_LINE";
      const char* delimiterTag = ";";
      const char* transitionTag = "-";
      QByteArray specLine = currentPDBLine.mid(10).trimmed().toAscii();

    //Q_UNUSED(engTag);
      
      bool saveRequired = false;
    
    if (specLine == "NULL") {
        ti.error = GB2::PDBFormat::tr("compound_record_is_empty");
        return;
    }

    if (currentMoleculeDescr.contains(savedTag)) {
            specLine.prepend(currentMoleculeDescr.value(savedTag).toString().toAscii());
            currentMoleculeDescr.remove(savedTag);
      }
      
      if (specLine.endsWith(transitionTag)) {
          // look forward to guess if it is not an end of record
        // (it could be, for example 1V6C)
        QByteArray nextLine = getNextSpecLine();
        if (!nextLine.startsWith("COMPND")) {
            //end of record detected
            int lastCharIndex = specLine.length() - 1;
            specLine.remove(lastCharIndex,1);
        } else {
             int length = specLine.length();
            // omit transition tag when saving specLine
                currentMoleculeDescr.insert(savedTag, specLine.left(length - 1));
                return;
        }
      }

    if (!specLine.endsWith(delimiterTag)) {
        // look forward to guess if transition required
        QByteArray nextLine = getNextSpecLine();
        if (nextLine.startsWith("COMPND")) {
            // this is transition, save this line
            currentMoleculeDescr.insert(savedTag, specLine);
            return;
        }  else {
            saveRequired = true;
        }
    }

      if (specLine.startsWith(idTag) ) {        
        if (!currentMoleculeDescr.isEmpty()) {
            saveRequired = true;
        }
        QByteArray molId = getSpecValue(specLine, idTag);
        currentMoleculeDescr.insert(idTag, molId);
    } else if (specLine.startsWith(nameTag)) {
            QByteArray name = getSpecValue(specLine, nameTag);
            currentMoleculeDescr.insert(nameTag, name);
      } else if (specLine.startsWith(chainsTag)) {
            QByteArray chainIds = getSpecValue(specLine, chainsTag);
            currentMoleculeDescr.insert(chainsTag, chainIds);
      }

    if (saveRequired) {
        //TODO: Try to use this index from PDB Entry
        //int index = currentMoleculeDescr.value(idTag).toInt();
        //Q_UNUSED(index);
        QString name =  currentMoleculeDescr.value(nameTag).toString();
        QString buffer = currentMoleculeDescr.value(chainsTag).toString();
        for( int j = 0; j < buffer.length(); ++j) {
            QChar chainIdentifier = buffer.at(j);
            if (chainIdentifier.isLetterOrNumber()) {
                int molId = chainIndexMap.size() + 1;
                chainIndexMap.insert(chainIdentifier, molId);
                MoleculeData* data = new MoleculeData();
                data->name = name;
                biostruct.moleculeMap.insert( molId, SharedMolecule(data) );
            }
        }
        Q_ASSERT(chainIndexMap.size() > 0);
        if (chainIndexMap.size() == 0) {
            ti.error = GB2::PDBFormat::tr("invalid_compound_record");
            return;
        }
        currentMoleculeDescr.clear();
    }
 
}


void PDBFormat::PDBParser::parseSequence( BioStruct3D& biostruct, TaskStateInfo& ti )
{
      /*
      Record Format 

      COLUMNS           DEFINITION 
      1 - 6       Record name "SEQRES" 
      8 - 10            Integer serNum Serial number of the SEQRES record for the current chain. 
                        Starts at 1 and increments by one each line. Reset to 1 for each chain. 
      12                Character chainID Chain identifier. This may be any single legal character,
                        including a blank which is is used if there is only one chain. 
      14 - 17           Integer numRes Number of residues in the chain. 
                        This value is repeated on every record.
      20 - ...    Residues
      */
      

      if (currentPDBLine.length() < 24 /* at least one residue */)
      {
        ti.error = GB2::PDBFormat::tr("invalid_seqres_less_then_24");
            return;
      }

      flagSequenceRecordPresent = true;
      
      QChar chainId = currentPDBLine.at(11);
    int chainIndex = getChainIndexByName(chainId);
    Q_ASSERT(chainIndex != 0);
    if (chainIndex == 0)
    {
        ti.error = GB2::PDBFormat::tr("invalid_seqres_chain_index_not_found");
        return;
    }
    
      if (currentChainId != chainId) {
            currentChainId = chainId;
      }
      
      QStringList residues = currentPDBLine.mid(19).split(QRegExp("\\s+"), QString::SkipEmptyParts);
      QString name;
    SharedMolecule mol = biostruct.moleculeMap.take(chainIndex);
    Q_ASSERT(mol);
    foreach (name, residues ) {
        SharedResidue residue( new ResidueData );
            residue->name = name.toAscii();
            residue->chainIdentifier = chainId;
            residue->acronym = PDBFormat::getAcronymByName(name.toAscii());
        if (residue->acronym == 'X') {
            log.details(tr("Warning: unknown residue name: %1").arg(residue->name.constData()));
        }
            residue->chainIndex = chainIndex;
            int residueIndex =  0;
        if (residueStartIndexMap.contains(chainIndex)) {
            residueIndex = residueStartIndexMap.value(chainIndex) + mol->residueMap.size();
        } else {
            residueIndex = 1 +  mol->residueMap.size();
        }
        mol->residueMap.insert(residueIndex, residue);
      }
    biostruct.moleculeMap.insert(chainIndex, mol);


}


QByteArray PDBFormat::PDBParser::getSpecValue( const QByteArray& specLine, const QByteArray& valueName )
{
      const char* delimTag(";");
      int startIndex = specLine.indexOf(valueName) + valueName.length() + 1; 
      int lastIndex = specLine.indexOf(delimTag);
      return specLine.mid(startIndex, lastIndex - startIndex).trimmed();
}

int PDBFormat::getElementNumberByName( const QByteArray& elementName )
{
      if (atomNumMap.contains(elementName)) 
            return atomNumMap.value(elementName);
      else 
            return 0;
}

double inline PDBFormat::getAtomCovalentRadius( int atomicNumber )
{
    Q_ASSERT(atomicNumber < NUM_ELEMENTS);
    return atomRadiusTable[atomicNumber];
        
}


int PDBFormat::PDBParser::getChainIndexByName( const QChar& chainId )
{
      if (chainIndexMap.contains(chainId)) 
            return chainIndexMap.value(chainId); 
      else
            return 0;
}

void PDBFormat::PDBParser::parseDBRef( BioStruct3D& biostruct, TaskStateInfo& ti )
{
    /* Record Format
     
    COLUMNS       DATA TYPE     FIELD              DEFINITION
    1 -  6       Record name   "DBREF "
    8 - 11       IDcode        idCode             ID code of this entry.
    13            Character     chainID            Chain  identifier.
    15 - 18       Integer       seqBegin           Initial sequence number of the PDB sequence segment.
    19            AChar         insertBegin        Initial  insertion code of the PDB  sequence segment.
    21 - 24       Integer       seqEnd             Ending sequence number of the PDB  sequence segment.
    25            AChar         insertEnd          Ending insertion code of the PDB  sequence segment.
    27 - 32       LString       database           Sequence database name. 
    34 - 41       LString       dbAccession        Sequence database accession code.
    43 - 54       LString       dbIdCode           Sequence  database identification code.
    56 - 60       Integer       dbseqBegin         Initial sequence number of the database segment.
    61            AChar         idbnsBeg           Insertion code of initial residue of the segment, if PDB is the reference.
    63 - 67       Integer       dbseqEnd           Ending sequence number of the database segment.
    68            AChar         dbinsEnd           Insertion code of the ending residue of the segment, if PDB is the reference.
    */

    Q_UNUSED(biostruct);
    Q_UNUSED(ti);
    QChar chainIdentifier = currentPDBLine.at(12);
    int chainIndex = getChainIndexByName(chainIdentifier);
    if (chainIndex == 0) {
        log.details(tr("Wanning: unknown chain index in dbref!"));
        return;
    } 


    int startIndex = currentPDBLine.mid(14,4).toInt();
    residueStartIndexMap.insert(chainIndex, startIndex);

}

QByteArray PDBFormat::PDBParser::getNextSpecLine()
{
    QByteArray readBuf(READ_BUF_SIZE+1, 0);
    char* buf = readBuf.data();
    bool lineOk;
    int len = io->readUntil(buf, READ_BUF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk);
    QByteArray line = QByteArray::fromRawData(buf, len);
    // retrieve back ioAdapter position
    io->skip(-len);
    return readBuf;

}
QChar PDBFormat::getAcronymByName( const QByteArray& name )
{     
      if (acronymNameMap.contains(name)) 
            return acronymNameMap.value(name); 
      else
            return QChar('X');

}

void PDBFormat::fillBioStruct3DAnnotationTable( AnnotationTableObject* ao, const BioStruct3D& bioStruct )
{
    
    foreach (SharedAnnotationData sdata, bioStruct.annotations) {
        ao->addAnnotation(new Annotation(sdata), bioStruct.pdbId);
    } 
}

DNASequenceObject* PDBFormat::createSequenceObject(const QByteArray& sequence, const QByteArray& sequenceName) {
    DNAAlphabetRegistry* r = AppContext::getDNAAlphabetRegistry();
    DNAAlphabet* al = r->findAlphabets(sequence, true).first();
    return new DNASequenceObject(sequence, al, sequenceName);
}

static inline bool existsCovalentBond( double r1, double r2, double distance ) {
    static const double tolerance = 0.45;

    if (distance <= r1 + r2 + tolerance) {
        return true;
    }

    return false;
}



void PDBFormat::calculateBonds( BioStruct3D& bioStruct )
{
    // maxDisance = 2*MaxCovalentRadius + tolerance
    static const double maxRadius = 2.0;
    static const double maxDistance = 2*maxRadius + 0.45;

    //quint64 num_comps = 0, firstFilter = 0, secondFilter = 0;
    clock_t t1 =  clock();
    QMap<int, SharedMolecule>::iterator molIter;
    for (molIter = bioStruct.moleculeMap.begin(); molIter != bioStruct.moleculeMap.end(); ++molIter) {
       SharedMolecule& mol = molIter.value();
       QMap< int, SharedAtom>::const_iterator i1,i2;
       QMap<int, SharedAtom>::const_iterator constEnd = mol->atomMap.constEnd();
       for (i1 = mol->atomMap.constBegin(); i1 != constEnd; ++i1) {
           const SharedAtom& a1 = i1.value();
           double r1 = getAtomCovalentRadius(a1->atomicNumber); 
           i2 = i1;
           for (++i2; i2 != constEnd; ++i2) {
               //++num_comps;
               const SharedAtom& a2 = i2.value();
               if ( ( qAbs(a2->coord3d.x - a1->coord3d.x) > maxDistance ) ||
                    ( qAbs(a2->coord3d.y - a1->coord3d.y) > maxDistance ) ||
                    ( qAbs(a2->coord3d.z - a1->coord3d.z) > maxDistance )  ) {
                   continue;
               }
               //++firstFilter;
               if ( (qAbs(a2->coord3d.x - a1->coord3d.x) + qAbs(a2->coord3d.y - a1->coord3d.y) + 
                   qAbs(a2->coord3d.z - a1->coord3d.z) ) > maxDistance ) {
                   continue;
               }
               //++secondFilter;
               double distance = (a1->coord3d - a2->coord3d).length();
               double r2 = getAtomCovalentRadius(a2->atomicNumber);
               if (existsCovalentBond(r1, r2, distance)) {
                     mol->bonds.append(Bond( a1, a2 ));
               }
            }
        }
    }
    clock_t t2 = clock();
    logPerf.trace("PDB bonds calculation time: " + QString::number((float)( t2 - t1)/ CLOCKS_PER_SEC ));
    //logPerf.trace(QString("num comparisons: %1").arg(num_comps));
    //logPerf.trace(QString("passed through first filter: %1").arg(firstFilter));
    //logPerf.trace(QString("passed through second filter: %1").arg(secondFilter));

   
}

} //namespace

Generated by  Doxygen 1.6.0   Back to index