Code
--[[
* Atom Database v 1.21
* Original author Brow42 Mar. 24, 2012
* Information about each atom in each AA according to atom number.
* API (all in the fsl.atom namespace)
* Many functions accept option 2 or 3 boolean arguments, which indicate
* if the segment is the first and/or last segment in a polypeptide.
* The third argument boolean is true if the segment is disulfide bonded.
* These arguments can be replaced by a call to IsTerminal(). If omitted,
* IsTerminal will be called automatically, except for GetExpectedCount,
* which is used to modify the db value.
* Most functions accept either a segment number or AA code as the first argument.
* _CountAtoms() is the only useful function for ligands ('M' structure). Other
* functions will return nil if passed a ligand segment.
* _CountAtoms(number iSeg) -- SLOW manual count all the atoms in the segment. You should use structure.GetAtomCount instead.
* GetExpectedCount(number or string iSeg, isFirst, isLast, isDisulfideBonded) -- total EXPECTED atoms in segment or AA, given flags, default false
* IsTerminal(number iSeg) -- returns bool,bool if start or end of a polypeptide
* GetBackboneHeavyAtoms(number or string iSeg, isFirst, isLast) -- range of atoms on the backbone
* GetSidechainHeavyAtoms(number or string iSeg, isFirst, isLast) -- range of atoms on the sidechain (nil for glycine)
* GetDonorAtoms(number or string iSeg, isFirst, isLast) -- list of donor atoms
* GetAcceptorAtoms(number or string iSeg, isFirst, isLast) -- list of acceptor atoms
* GetPolarHydrogens(number or string iSeg, isFirst, isLast) -- list of polar hydrogen atoms
* _IsDisulfideBonded(number iSeg) -- true if bonded to another cysteine
* _IsTerminalTest(aa,count,disulfide) -- If already called CountAtoms(), don't call IsTerminal(), call this instead.
* _NumberOrCode(number or string iSeg) -- performs a table look up given segment number or AA code.
* Test1() -- Count all atoms, compare with table, perform terminal segment check, for all segments
* Test2(mode) -- Band all atoms of a particular class. mode = sc, bb, donor, polar, acceptor
* db -- Reference table of atom numbers for each class. Should not be accessed directly since bonding changes atom counts.
* atomcount -- table of just the expected total atom counts
* Version 1.1 Brow42 May 14, 2012
*
* CountAtoms renamed to _CountAtoms
* CountAtoms is now a wrapper for structure.GetAtomCount
* GetCount renamed to GetExpectedCount and does not auto-lookup flags
* IsTerminal is now called by the various Get* functions if isFirst = nil
* A small table of nominal atom counts has been added to the top of the file, and the functions
* needed to determine if the AA is bonded also moved to the front and only require this small
* table. This is so if all you need is IsTerminal, you don't need the entire library.
* Version 1.2 Brow42 April 22, 2014
*
* Added Functions:
* GetAtom(number seg, number atom, isFirst, isLast, isDisulfideBonded) -- returns string,string
* first is O,C,H,S or nil (if not a standard amino acid, a ligand, or proline atom 8)
* second is a,d,b,p,n,v = acceptor, donor, both, polar h, non-polar, virtual atom
* Test3() -- mutate to all 20 AA and call GetAtom on all the atoms.
* GetHydrogenAtoms(number or string iSeg) -- list of hydrogens (polar and non-polar)
* Corrected first hydrogen of proline to be 9.
* Note that atom 8 of proline is not a real atom (although it is very close to atom 1)
* Version 1.21 Brow41 Oct 23, 2015
*
* Hist polar hydrogen move from 17 to 15.
--]]
unpack = unpack or table.unpack -- for 5.2
-- ========================== Begin AA Atom Database ================
fsl = fsl or {} -- make sure fsl namespace exists
fsl.atom = {}
-- Take just these 4 things if all you need is to know if the sidechain starts at 5 or 6 (tail terminal = true)
fsl.atom.atomcount = { -- shorter table just for identifying terminals
a=10, c=11, d=12, e=15, f=20, g=7, h=17, i=19, k=22, l=19,
m=17, n=14, p=15, q=17, r=24, s=11, t=14, v=16, w=24, y=21
}
-- Pass in a segment number of a cysteine
function fsl.atom._IsDisulfideBonded(iSeg)
local s = current.GetSegmentEnergySubscore(iSeg,'disulfides')
return tostring(s) ~= '-0'
end
function fsl.atom._IsTerminalTest(aa,count,disulfide)
local dbvalue = fsl.atom.atomcount[aa]
if dbvalue == nil then error('Bad argument to an fsl.atom function (not an amino acid code)') end
local diff = count - dbvalue
if disulfide then diff = diff + 1 end -- because count is one less because a H was removed
if diff == 0 then return false, false, disulfide
elseif diff == 1 then return false, true, disulfide
elseif diff == 2 then return true, false, disulfide
elseif diff == 3 then return true, true, disulfide
end
error('Strange atom count, report this!')
end
-- Determine if segment is start or end of polypeptide by looking for excess atoms
function fsl.atom.IsTerminal(iSeg)
if structure.GetSecondaryStructure(iSeg) == 'M' then return false,false,false end
local aa = structure.GetAminoAcid(iSeg)
local count = structure.GetAtomCount(iSeg)
return fsl.atom._IsTerminalTest(aa,count,fsl.atom._IsDisulfideBonded(iSeg))
end
-- ====== now the rest of Atom Tables =====
fsl._nul = {} -- don't need so many CONSTANT empty tables
-- This is a table of atom classifications for AAs in a 'standard' state
-- The query functions apply adjustments due to bonds that change atom count
-- the atom element field will be filled in later ({} vs. constant fsl._nul entries)
fsl.atom.db = {} -- to save typing, these are stored as an array { polar-H, donor, acceptor, total count }
fsl.atom.db['a'] = { {6}, fsl._nul, fsl._nul, fsl._nul, 10 } -- alanine
fsl.atom.db['c'] = { {7}, fsl._nul, fsl._nul, {}, 11 } -- cysteine -- NB 10 if S-S bonded!
fsl.atom.db['d'] = { {9}, fsl._nul, {7,8}, {}, 12} -- aspartate
fsl.atom.db['e'] = { {10}, fsl._nul, {8,9}, {}, 15 } -- glutamate
fsl.atom.db['f'] = { {12}, fsl._nul, fsl._nul, fsl._nul, 20 } -- phenylalanine
fsl.atom.db['g'] = { {5}, fsl._nul, fsl._nul, fsl._nul, 7 } -- glycine
fsl.atom.db['h'] = { {11,15}, {10}, {7}, {}, 17 } -- histidine
fsl.atom.db['i'] = { {9}, fsl._nul, fsl._nul, fsl._nul, 19 } -- isoleucine
fsl.atom.db['k'] = { {10,20,21,22}, {9}, fsl._nul, {}, 22 } -- lysine
fsl.atom.db['l'] = { {9}, fsl._nul, fsl._nul, fsl._nul, 19 } -- leucine
fsl.atom.db['m'] = { {9}, fsl._nul, fsl._nul, {}, 17 } -- methionine
fsl.atom.db['n'] = { {9,13,14}, {8}, {7}, {}, 14 } -- asparagine
fsl.atom.db['p'] = { fsl._nul, fsl._nul, fsl._nul, {}, 15 } -- proline
fsl.atom.db['q'] = { {10,16,17}, {9}, {8}, {}, 17 } -- glutamine
fsl.atom.db['r'] = { {12,20,21,22,23,24}, {8, 10, 11}, fsl._nul, {}, 24 } -- arginine
fsl.atom.db['s'] = { {7,11}, {6}, {6}, {}, 11 } -- serine
fsl.atom.db['t'] = { {8,11}, {6}, {6}, {}, 14 } -- theronine
fsl.atom.db['v'] = { {8}, fsl._nul, fsl._nul, fsl._nul, 16 } -- valine
fsl.atom.db['w'] = { {15,20}, {9}, fsl._nul, {}, 24 } -- tryptophan
fsl.atom.db['y'] = { {13,21}, {12}, {12}, fsl._nul, 21 } -- tyrosine
-- add aliass for the table entries
for i,v in pairs(fsl.atom.db) do
v.polarh, v.donor, v.acceptor, v.atom, v.count = unpack(v)
end
-- supplement for atom identification (inline construction doesn't work for numbers)
-- unlisted atoms are either carbon or hydrogen (except proline 8, which is virtual)
fsl.atom.db['c'].atom[6] = 'S'
fsl.atom.db['d'].atom[7] = 'O'
fsl.atom.db['d'].atom[8] = 'O'
fsl.atom.db['e'].atom[8] = 'O'
fsl.atom.db['e'].atom[9] = 'O'
fsl.atom.db['h'].atom[7] = 'N'
fsl.atom.db['h'].atom[10] = 'N'
fsl.atom.db['k'].atom[9] = 'N'
fsl.atom.db['m'].atom[7] = 'S'
fsl.atom.db['n'].atom[7] = 'O'
fsl.atom.db['n'].atom[8] = 'N'
--fsl.atom.db['p'].atom[8] = 'N'
fsl.atom.db['q'].atom[8] = 'O'
fsl.atom.db['q'].atom[9] = 'N'
fsl.atom.db['r'].atom[8] = 'N'
fsl.atom.db['r'].atom[10] = 'N'
fsl.atom.db['r'].atom[11] = 'N'
fsl.atom.db['s'].atom[6] = 'O'
fsl.atom.db['t'].atom[6] = 'O'
fsl.atom.db['w'].atom[9] = 'N'
function fsl.atom.CountAtoms(iSeg) return structure.GetAtomCount(iSeg) end
fsl.atom._lastatomerr = 'atom number out of bounds'
-- Find out how many atoms in a segment. SLOW!
-- We do this by trying to band atoms until we get an error
function fsl.atom._CountAtoms(iSeg)
local function LastAtom(errmsg)
return string.find(errmsg,fsl.atom._lastatomerr) ~= nil
end
local s2,s3,n
n = structure.GetCount()
if iSeg == 1 then s2,s3 = 2,3
elseif iSeg == n then s2,s3 = n-1, n-2
else s2,s3 = iSeg-1,iSeg+1
end
local function zlb(iSeg, iAtom)
local rc,iBand = pcall(band.Add,iSeg,s2,s3, 0.01,0,0, iAtom)
if rc == false then error(iBand) end
if iBand == 0 then error(string.format('Unknown Band Add Error on seg %d atom %d',iSeg,iAtom)) end
band.Delete(iBand)
end
local iAtom = 0
while true do
iAtom = iAtom + 1
rc, err = pcall(zlb,iSeg, iAtom)
if rc == false then break end -- out of atoms, OR some other error
end -- found the correct atom
if not LastAtom(err) then error(err) end
return iAtom - 1
end
-- returns nil if it's a ligand, else returns the DB entry and aa code
-- This function lets us pass either the number of an actual segment, or
-- just look up the properties by AA code. It also handles ligand segments and
-- upper/lower case input
function fsl.atom._NumberOrCode(iSeg)
local aa
if type(iSeg) == 'number' then
local type = structure.GetSecondaryStructure(iSeg)
if type == 'M' then return end
aa = structure.GetAminoAcid(iSeg)
else aa = string.lower(iSeg)
end
local c = fsl.atom.db[aa]
if c == nil then error('Bad argument to an fsl.atom function (not an amino acid code)') end
return c, aa
end
-- iSeg is a segment number or amino acid letter code
function fsl.atom.GetExpectedCount(iSeg, isFirst, isLast, isDisulfideBonded)
local db = fsl.atom._NumberOrCode(iSeg)
if db == nil then return fsl.atom.CountAtoms(iSeg) end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast = fsl.atom.IsTerminal(iSeg) end
local c = db.count
if isFirst then c = c + 2 end
if isLast then c = c + 1 end
if isDisulfideBonded then c = c - 1 end -- S-H + H-S -> S-S + 2H
return c
end
-- Get a list of backbone heavy atoms always (1,4) or (1,5)
function fsl.atom.GetBackboneHeavyAtoms(iSeg, isFirst, isLast)
local db = fsl.atom._NumberOrCode(iSeg)
if db == nil then return end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast = fsl.atom.IsTerminal(iSeg) end
return isLast and {1, 2, 3, 4, 5} or { 1, 2, 3, 4}
end
-- Sidechain heavy atoms, starting with C-beta
-- Returns nil for ligands
function fsl.atom.GetSidechainHeavyAtoms(iSeg, isFirst, isLast)
local db, aa = fsl.atom._NumberOrCode(iSeg)
if db == nil then return end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast = fsl.atom.IsTerminal(iSeg) end
local a,b
if aa == 'g' then return {} -- g has no sidechain
elseif aa == 'p' then a,b = 5,7 -- p no polar hydrogen to tell us this range
else a,b = 5, db.polarh[1]-1 -- everything else has sidechain followed by a polar hydrogen in table
end
if isLast then a,b = a+1,b+1 end -- shift due to terminal O at 5
local list = {}
for i = a,b do list[#list + 1] = i end
return list
end
-- Get list of donor atoms
-- Returns nil for ligands
function fsl.atom.GetDonorAtoms(iSeg, isFirst, isLast)
local db,aa = fsl.atom._NumberOrCode(iSeg)
if db == nil then return nil end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast = fsl.atom.IsTerminal(iSeg) end
local list
if aa == 'p' then list = {} -- {1} -- is this true?!
else list = {1} -- the backbone N
end
local shift = 0
if isLast then shift = 1 end -- shift due to terminal O, all remaining donors are on sidechain
for i = 1,#db.donor do list[#list+1] = db.donor[i] + shift end
return list
end
-- Get list of acceptor atoms
-- Returns nil for ligands
function fsl.atom.GetAcceptorAtoms(iSeg, isFirst, isLast)
local db = fsl.atom._NumberOrCode(iSeg)
if db == nil then return nil end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast = fsl.atom.IsTerminal(iSeg) end
local list = { 4 } -- The O attached to the backbone C'
local shift = 0
if isLast then
shift = 1
list = {4 , 5} -- 2 O on the terminus
end
-- all remaining acceptors are on the sidechain
for i = 1,#db.acceptor do list[#list+1] = db.acceptor[i] + shift end
return list
end
-- Get list of polar hydrogen atoms
-- Returns nil for ligands
function fsl.atom.GetPolarHydrogens(iSeg, isFirst, isLast)
local db,aa = fsl.atom._NumberOrCode(iSeg)
if db == nil then return nil end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast = fsl.atom.IsTerminal(iSeg) end
local list = {}
local tmp, first -- our list of H, and where the first H will go
if aa == 'p' then tmp, first = {}, 9 -- no table, so hardcoded
else tmp, first = db.polarh, db.polarh[1] -- table provided, first element is first polar hydrogen
end
local shift = 0
if isLast then shift = 1 end -- all H are after the sidechain
first = first + shift -- Shifted by terminal O
if isFirst then
shift = shift + 2 -- Shift the table up after the 2 added H
list = { first, first + 1 } -- Add 2 terminal H
end
for i = 1,#tmp do list[#list+1] = tmp[i] + shift end
return list
end
-- Get list of hydrogen atoms
-- Returns nil for ligands
function fsl.atom.GetHydrogenAtoms(iSeg, isFirst, isLast, isDisulfide)
local db,aa = fsl.atom._NumberOrCode(iSeg)
if db == nil then return nil end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast, isDisulfide = fsl.atom.IsTerminal(iSeg) end
local list = {}
local first -- where the first H will go
if aa == 'p' then first = 9 -- no table, so hardcoded
else first = db.polarh[1] -- table provided, first element is first polar hydrogen
end
local shift = 0
if isLast then shift = 1 end -- all H are after the sidechain
first = first + shift -- Shifted by terminal O
local n_atoms = db.count
if isFirst then n_atoms = n_atoms + 2 end
if isLast then n_atoms = n_atoms + 1 end
if isDisulfide then n_atoms = n_atoms - 1 end
for i = first,n_atoms do list[#list+1] = i end
return list
end
-- Get the element and function of an atom
-- Returns two string results (see top of file for explanation)
-- only first two arguments are required
function fsl.atom.GetAtom(iSeg, atom, isFirst, isLast, isDisulfide)
local bb = { {'N','d'}, {'C','n'}, {'C','n'}, {'O','a'} } -- backbone atoms (0 same as 2)
local db,aa = fsl.atom._NumberOrCode(iSeg)
if db == nil or atom < 0 then return nil,nil end
if atom == 0 then atom = 2 end
if isFirst == nil and type(iSeg) == 'number' then isFirst, isLast, isDisulfide = fsl.atom.IsTerminal(iSeg) end
local n_atoms = db.count
if isFirst then n_atoms = n_atoms + 2 end
if isLast then n_atoms = n_atoms + 1 end
if isDisulfide then n_atoms = n_atoms - 1 end
if atom > n_atoms then return nil,nil end -- atom value out of range
if atom <= 4 then return unpack(bb[atom]) end
local shift = 0
if isLast then
if atom==5 then return "O","a" end -- the extra O- on the oxygen terminal
atom = atom - 1 -- else adjust atom number to skip over this atom which isn't in the table
end
local first,last -- where the first H will go and the last heavy atom
if aa == 'p' then
if atom == 8 then return nil,'v' end -- atom 8 is a virtual atom
first,last = 9,7 -- no table, so hardcoded
else first,last = db.polarh[1],db.polarh[1]-1 -- table provided, first element is first polar hydrogen
end
if atom <= last then -- determine sidechain heavy atom element and function
local element,role = 'C','n' -- default sidechain atom
if db.atom[atom] then
element = db.atom[atom] -- exceptions are stored by atom number
-- only non-carbons are acceptor or donors so we run this loop only for those
local donor,acceptor = false,false
for i=1,#db.donor do
if db.donor[i] == atom then donor = true break end
end
for i=1,#db.acceptor do
if db.acceptor[i] == atom then acceptor = true break end
end
if donor then role = 'd' end
if acceptor then role = 'a' end
if donor and acceptor then role = 'b' end
end
return element,role
end
-- only hydrogen case now
local shift = 0
if isFirst then
shift = 2
end
if first <= atom and atom <= first + shift then
return 'H','p' -- the extra two polar hydrogens on N-terminal residue
end
for i=1,#db.polarh do
if db.polarh[i] == atom-shift then return 'H','p' end
end
return 'H','n' -- the rest of the hydrogens are non-polar
end
-- Count all the atoms of every segment and check if terminal
function fsl.atom.Test1()
local list = {}
for i,v in pairs(fsl.atom.db) do list[i] = true end
local n = structure.GetCount()
for i = 1, n do
local ss = structure.GetSecondaryStructure(i)
if ss == 'M' then print ('Ligand',i,fsl.atom.CountAtoms(i))
else
local j = fsl.atom.CountAtoms(i)
local aa = structure.GetAminoAcid(i)
print('AA',aa,i,j, fsl.atom.GetExpectedCount(aa),
fsl.atom._IsTerminalTest(aa,j,fsl.atom._IsDisulfideBonded(i)))
list[aa] = nil
end
end
print('Missing AAs:')
for i,v in pairs(list) do print(i) end
end
-- Band either all SC atoms, all Donor atoms, all Acceptor atoms, or all polar H
-- Automatically checks for terminal segments
-- mode = sc, bb, donor, polar, acceptor
function fsl.atom.Test2(mode)
local s2,s3,n
n = structure.GetCount()
local function zlb(iSeg, iAtom)
local rc,iBand = pcall(band.Add,iSeg,s2,s3, 0.01,0,0, iAtom)
if rc == false then error(iBand) end
if iBand == 0 then error(string.format('Unknown Band Add Error on seg %d atom %d',iSeg,iAtom)) end
end
if mode == 'sc' then func = fsl.atom.GetSidechainHeavyAtoms
elseif mode == 'donor' then func = fsl.atom.GetDonorAtoms
elseif mode == 'acceptor' then func = fsl.atom.GetAcceptorAtoms
elseif mode == 'polar' then func = fsl.atom.GetPolarHydrogens
elseif mode == 'bb' then func = fsl.atom.GetBackboneHeavyAtoms
else error('Unrecognized mode string')
end
for i = 1,n do
if i == 1 then s2,s3 = 2,3
elseif i == n then s2,s3 = n-1, n-2
else s2,s3 = i-1,i+1
end
local ss = structure.GetSecondaryStructure(i)
if ss ~= 'M' then
local list = func(i,fsl.atom.IsTerminal(i)) -- this is how you autodetect terminal and disulfide flags
for j = 1,#list do zlb(i,list[j]) end
end
end
end
function fsl.atom.Test3() --- mutate to all residues then dump their atoms (use a design puzzle without layer filter)
selection.SelectAll()
structure.SetAminoAcidSelected('g')
selection.DeselectAll()
seg = 2
for aa,_ in pairs(fsl.atom.atomcount) do
structure.SetAminoAcid(seg,aa)
print(aa:upper())
for i = 1,structure.GetAtomCount(seg) do
print(i,fsl.atom.GetAtom(seg,i))
end
print()
seg = seg + 1
end
end
-- ========================== End AA Atom Database ================
fsl.atom.Test3()