
#include "pdbReader.h"


// Constructor
//
PDBReader::PDBReader(Alphabet* alpha)
  : alphabet(alpha), filename(0),
     path(0), fullName(0), structureName(0) {

  //printf("PDBReader::Constructing\n");

  return;
}


// Destructor
//
PDBReader::~PDBReader() {

  delete filename;
  delete path;
  delete fullName;
  delete structureName;

  return;
}


// setFilename
//   Set the name for the structure file to be read;
//   If the initial name does not correspond to a
//   readable file, checks are done to see if adding
//   suffixes .pdb or .ent will return a valid file
int PDBReader::setFilename(char* fn) {

  //printf("=>PDBReader::setFilename\n");

  if (filename != 0) {
    delete filename;
    filename = 0;
  }

  if (fullName != 0) {
    delete fullName;
    fullName = 0;
  }

  int len = strlen(fn);
  char* tempName = new char[len+5];  // len+1 is normal (+4) for suffix
  strncpy(tempName,fn,len);
  tempName[len] = '\0';

  filename = new char[len+1];
  strncpy(filename,fn,len);
  filename[len] = '\0';

  // Given name works
  if (checkFullName() == 1) {
    //printf("\n      %s works\n\n",filename);
    setNameFromFilename();
    //printf("<=PDBReader::setFilename\n");
    return 1;
  }
  //printf("   %s doesn't work\n",filename);

  delete filename;
  filename = new char[len+5];
  strncpy(filename,fn,len);
  filename[len] = '\0';
  strncat(filename,".pdb",4);

  // Name + ".pdb" works
  if (checkFullName() == 1) {
    //printf("\n      %s works\n\n",filename);
    // Always setNameFromFilename, people can change the name later
    setNameFromFilename();
    //printf("<=PDBReader::setFilename\n");
    return 1;
  }
  //printf("   %s doesn't work\n",filename);
  
  filename[len] = '\0';
  strncat(filename,".ent",4);

  // Name + ".ent" works
  if (checkFullName() == 1) {
    //printf("\n      %s works\n\n",filename);
    setNameFromFilename();
    //printf("<=PDBReader::setFilename\n");
    return 1;
  }
  //printf("   %s doesn't work\n",filename);

  delete filename;
  filename = 0;

  //printf("<=setFilename: 0\n");

  return 0;
}


int PDBReader::setPath(char* p) {

  //printf("=>setPath\n");
  //printf("  p: %s\n",p);
  int len = strlen(p);
  if (path != 0)  delete path;
  path = new char[len+1];
  strncpy(path,p,len);
  path[len] = '\0';
  //printf("<=setPath\n");

  return 1;
}


Structure* PDBReader::getStructure(int structIndex) {

  //printf("=>getStructure(%d)\n",structIndex);

  FILE* infile = fopen(fullName,"r");
  //printf("   fullName: %s\n",fullName);

  if (infile == NULL) {
    //printf("<=getStructure(): 0\n");
    return 0;
  }

  FILE *fpError = fopen("pdbError","a");

  int len = getElementCount();

  Structure* struc = new Structure(len, alphabet);
  if (structureName != 0) {
    //printf("structureName: %s\n", structureName);
    struc->setName(structureName);
  }

  char* tempStr1 = new char[1024];
  char* tempStr2 = new char[16];
  char* symbolStr = new char[4];
  symbolStr[3] = '\0';
  char* bb = alphabet->getBackbone();
  Coordinate3D* tempCoord = 0;    //
  int bbLen = strlen(bb);
  int pdbIndex = 0;   // index within pdb file
  int pdbIndexOld = 0;
  int elementCount = 0;
  int flag = 0;
  //int check = 0;
  while (!feof(infile) && flag == 0) {
    fgets(tempStr1,1023,infile);
    if (strncmp(tempStr1,"TER",3) == 0) {
      flag++;
    }
    strncpy(symbolStr,tempStr1+17,3);
    // Check if pdb residue info is 3-letter or 1-letter code
    Symbol* symbol = 0;
    if ( symbolStr[0] == ' ' &&
	 symbolStr[1] == ' ' ) {
      symbol = alphabet->getSymbol(symbolStr[2]);
    }
    else if ( symbolStr[0] == ' ' &&
	      symbolStr[2] == ' ' ) {
      symbol = alphabet->getSymbol(symbolStr[1]);
    }
    else if ( symbolStr[1] == ' ' &&
	      symbolStr[2] == ' ' ) {
      symbol = alphabet->getSymbol(symbolStr[0]);
    }
    else {
      // 3-letter case
      symbol = alphabet->getSymbol(symbolStr);
    }

    if ( ( strncmp(tempStr1,"ATOM",4) == 0 ||
	   strncmp(tempStr1,"HETATM",4) == 0 ) &&
	 ( strncmp(tempStr1+13,bb,bbLen) == 0 ) &&
	 ( symbol != 0 ) ) {
      //printf("   In if1\n");
      strncpy(tempStr2,tempStr1+23,3);
      tempStr2[3]='\0';
      pdbIndex=charToInt(tempStr2);
      //        if(i==0) seqOffset[iSeq]=num;

      // Set tempCoord coordinate values
      if (tempCoord != 0) {
	delete tempCoord;
	//printf("   deleting tempCoord\n");
      }
      tempCoord = readCoordinate3DFromFile(tempStr1);
      //printf("tempCoord: %f, %f, %f\n",tempCoord->getX(),tempCoord->getY(),tempCoord->getZ());
      if (tempCoord == 0) {
	//printf("   In if2\n");
        fprintf(fpError,"WARNING: In file %s, could not read element %d\n",filename,pdbIndex);
      }
      else if (elementCount > 0 && pdbIndex-pdbIndexOld != 1) {
	//printf("   In if3\n");
	fprintf(fpError,"elementCount: %d\n",elementCount);
        fprintf(fpError,"WARNING: In file %s, CA neighbors %d and %d\n",filename,pdbIndexOld,pdbIndex);
	//Coordinate3D* tempCoordOld = struc->getCoordinate(elementCount-1);
	Coordinate3D* tempCoordOld = struc->getCoordinate(elementCount-1);
	float distance = tempCoord->getDistanceTo(tempCoordOld);
	if (distance > 3.0) {
	  //printf("   In if4\n");
	  fprintf(fpError,"CA distance = %1.3f > 3.000, so assuming consecutive residues\n",distance);
	 struc->addElement(symbol,tempCoord);
	 elementCount++;
	}
	else {
	  //printf("   In if5\n");
          fprintf(fpError,"CA distance = %1.3f < 3.000, so assuming duplicate residues\n",distance);
          //elementCount--;
	}
	//delete tempCoordOld;
      }
      else {
	//printf("   In if6\n");
	struc->addElement(symbol,tempCoord);
	elementCount++;
      }
      //printf("   Out of if\n");
    }
    
    //loopCount++;
    pdbIndexOld = pdbIndex;
  }
  fclose(infile);
  fclose(fpError);

  delete tempStr1;
  delete tempStr2;
  delete symbolStr;
  delete bb;
  //delete coords;
  delete tempCoord;

  //printf("<=getStructure()\n");

  return struc;
}


Structure* PDBReader::getNextStructure() {



  return 0;
}


int PDBReader::getElementCount() {

  //printf("Entering getElementCount()\n");

  FILE* infile = fopen(fullName,"r");

  if (infile == NULL) {
    return 0;
  }

  char* tempStr = new char[1024];

  int len = 0;
  int flag = 0;
  char* bb = alphabet->getBackbone();
  int bbLen = strlen(bb);

  fgets(tempStr,1023,infile);
  while (!feof(infile) && flag == 0) {
    //printf("%s\n",tempStr);
    if (strncmp(tempStr,"TER",3) == 0) {
      flag++;
    }
    if ( ( strncmp(tempStr,"ATOM",4) == 0 ||
	   strncmp(tempStr,"HETATM",4) == 0 ) &&
	 ( strncmp(tempStr+13,bb,bbLen) == 0 ) ) {
      len++;
    }
    fgets(tempStr,1023,infile);
  }
  fclose(infile);
  
  delete bb;

  //printf("   len = %d\n",len);
  //printf("Exiting getElementCount()\n");

  return len;
}


Coordinate3D* PDBReader::readCoordinate3DFromFile(char* str) {

  float* coordArray = new float[3];
  char* tempStr = new char[16];

  strncpy(tempStr,str+30,8);   // X
  tempStr[8]='\0';
  coordArray[0] = charToFloat(tempStr);
  strncpy(tempStr,str+38,8);   // Y
  tempStr[8]='\0';
  coordArray[1] = charToFloat(tempStr);
  strncpy(tempStr,str+46,8);   // Z
  tempStr[8]='\0';
  coordArray[2] = charToFloat(tempStr);
  
  Coordinate3D* coord = new Coordinate3D(coordArray[0],coordArray[1],coordArray[2]);
  
  //printf("coord: %f, %f, %f\n",coord->getX(),coord->getY(),coord->getZ());
  
  delete coordArray;
  delete tempStr;

  return coord;
}


// checkFullName
//   Make sure that fullName corresponds to an
//   accessible file
int PDBReader::checkFullName() {

  //printf("=>PDBReader::checkFullName\n");
  //printf("  path (%d): %s\n",strlen(path),path);
  //printf("  filename (%d): %s\n",strlen(filename),filename);

  if (filename == 0) {
    return 0;
  }

  if (fullName == 0) {
    if (path == 0) {
      //printf("   checking1: %s\n",filename);
      //FILE* infile = fopen(filename,"r");
      //if (infile == NULL) {
      //fclose(infile);
      //return 0;
      //}
      //fclose(infile);
      if (checkPdbFile(filename) == 0) {
	return 0;
      }
      int filenameLen = strlen(filename);
      fullName = new char[filenameLen + 1];
      strncpy(fullName,filename,filenameLen);
      fullName[filenameLen] = '\0';
      //printf("   return1\n");
      //printf("<=PDBReader::checkFullName\n");
      return 1;
    }
    else {
      int pathLen = strlen(path);
      int filenameLen = strlen(filename);
      fullName = new char[pathLen + filenameLen + 1];
      strncpy(fullName,path,pathLen);
      fullName[pathLen] = '\0';
      strncat(fullName,filename,filenameLen);
      fullName[pathLen + filenameLen] = '\0';
      //printf("   checking2: %s\n",fullName);
      //FILE* infile = fopen(fullName,"r");
      //if (infile == NULL) {
	//fclose(infile);
	//delete fullName;
	//fullName = 0;
	//printf("<=PDBReader::checkFullName: 0\n");
	//return 0;
	//}
      //fclose(infile);
      if (checkPdbFile(fullName) == 0) {
	delete fullName;
	fullName = 0;
	//printf("<=PDBReader::checkFullName: 0\n");
	return 0;
      }
      //printf("   return2\n");
      //printf("<=PDBReader::checkFullName\n");
      return 1;
    }
  }
  else {
    //printf("   checking3: %s\n",fullName);
    //FILE* infile = fopen(fullName,"r");
    //if (infile == NULL) {
      //fclose(infile);
      //delete fullName;
      //fullName = 0;
      //printf("<=PDBReader::checkFullName\n");
      //return 0;
      //}
    //fclose(infile);
    if (checkPdbFile(fullName) == 0) {
      delete fullName;
      fullName = 0;
      //printf("<=PDBReader::checkFullName: 0\n");
      return 0;
    }
    //printf("   return3\n");
    //printf("<=PDBReader::checkFullName\n");
    return 1;
  }

  //printf("<=checkFullName: 0\n");
  return 0;
}


// checkPdbFile
//
int PDBReader::checkPdbFile(char* file) {
  
  FILE* infile = fopen(file,"r");
  if (infile == NULL) {
    fclose(infile);
    return 0;
  }
  
  char* tempString = new char[1024];
  int flag = 0;

  while (!feof(infile) && flag == 0) {
    fgets(tempString,1023,infile);
    if (strncmp(tempString,"ATOM",4) == 0 ||
	strncmp(tempString,"HETATM",4) == 0 ) {
      flag++;
    }
  }
  fclose(infile);

  delete tempString;

  if (flag == 0) {
    return 0;
  }

  return 1;
}


// setName
//   Set name from the filename minus ".pdb" or ".ent" suffixes
int PDBReader::setNameFromFilename() {

  //printf("=>PDBReader::setNameFromFilename\n");

  if (filename == 0) {
    //printf("<=PDBReader::setNameFromFilename\n");
    return 0;
  }

  if (structureName != 0) {
    delete structureName;
  }

  int len = strlen(filename);
  char* tempName = new char[len+1];
  strncpy(tempName,filename,len);
  tempName[len] = '\0';

  //printf("tempName: %s\n",tempName);

  // Check the name for trailing '.pdb' or '.ent'
  int pdbCheck = 0;
  int entCheck = 0;
  int tempLen = len;   // Length of current tempName (cutting off suffixes)
  while ( (pdbCheck == 0 ||
	   entCheck == 0) &&
	  tempLen >= 0 ) {
    pdbCheck = strncmp(tempName+(tempLen-4),".pdb",4);
    entCheck = strncmp(tempName+(tempLen-4),".ent",4);
    if ( pdbCheck == 0 ||
	 entCheck == 0 ) {
      tempLen -= 4;
    }
  }

  if (tempLen <= 0) {
    delete tempName;
    return 0;
  }

  tempName[tempLen] = '\0';
  len = strlen(tempName);
  structureName = new char[len+1];
  strncpy(structureName,tempName,len);
  structureName[len] = '\0';

  delete tempName;

  //printf("<=PDBReader::setNameFromFilename\n");
  return 1;
}
