Code
--[[
Lig Detector -- detect presence of (possibly spurious) "ligands"
Some Foldit puzzles include a "ligand", a non-protein attachment.
A ligand can be single metal atom, or a small molecule with 50
or more atoms.
The Foldit convention is that any ligands are the last segments
of the puzzle, with a secondary structure code "M", for "molecule".
At one point in time, the "auto structures" tool was broken, and would
incorrectly mark the last segment as a ligand. This defect has since
been fixed. Lig Detector was created to identify these spurious ligands.
Many recipes avoid working on ligands. Typically, these recipes scan
backwards from the last segment, looking for structure code "M".
it. At least one much older puzzle had a ligand in the middle of the
protein, which would break this approach.
Several recent Foldit puzzles have included small molecule ligands,
such as the aflatoxin puzzles 1440 and 1445, and the "Y1 receptor"
puzzle 1446. The "classroom" puzzle 1446 included a small molecule
ligand and three metal atom ligands.
Most ligands are locked and can't be modified. In 1446, the ligand
was unlocked and had multiple "rotamers", different shapes that it
could assume. Recipes which avoided this ligand were missing the
main goal of the puzzle.
Lig Detector looks for all possible ligands, and reports the segment
position of each one found. The recipe also checks to see whether
the ligand is locked, and looks at the atom count, rotamer count, and
score to determine whether the ligand may be modifiable.
The recipe also looks at the amino acid code of the ligand, which is
usually "unk" for a true ligand. Any ligands with a standard amino
acid code are suspect, and possibly indicate the return of the auto
structures bug.
There are some quirks in testing for modifiable ligands. Some ligands
have an unlocked backbone, but locked sidechains. If you try to drag
a ligand of this type, you'll see a "sidechain is locked" message.
The Foldit Lua function structure.IsLocked reports only on locked
backbone.
Using a recipe, there is no way to tell whether a sidechain is locked.
(It's also a good question as to whether a ligand even *has* a sidechain,
but apparently, from Foldit's point of view, it does.)
Another quirk is single atoms. These ligands appear just a small spheres.
They may represent calcium, zinc, copper, or perhaps another metal.
The Foldit Lua function structure.GetAtomCount may return a number
greater than 1 for these ligands. For example, in puzzle 1443, the
single-atom ligands reported an atom count of 5. Apparently extras
are "virtual atoms", representing atoms which are expected to bond to
the metal. In 1443, the single-atom ligands were zinc, and each was
expected to be bonded to the sulfur atoms of four adjacent cysteines.
This recipe is based in part on "print protein lua v0" by marie_s
An earlier version used spvincent's structure detection logic. This
version is based in part on Jean-Bob's ligand detection code, although
modified to scan the entire protein.
version history
---------------
Lig Detector 1.0 - 2015/05/19 - LociOiling
Lig Detector 1.1 - 2017/11/11 - LociOiling
* refine ultra-paranoiac approach, looking at AA code of ligand
* display obsessive details about each ligand in light of recent puzzles
* dispense with ranges of ligands, treat each one as an individual
]]--
--
-- globals section
--
Recipe = "Lig Detector"
Version = "1.1"
ReVersion = Recipe .. " " .. Version
segCnt = 0
segCnt2 = 0
--
-- ligand table indexes
--
LTSEG = 1
LTLOCK = 2
LTAA = 3
LTATOM = 4
LTROTA = 5
LTSCOR = 6
LTMOD = 7
LTODD = 8
LTDUB = 9
--
-- ligand counts
--
lModable = 0
lOddball = 0
lDubious = 0
--
-- 20 proteinogenic amino acids
--
AminoAcids = {
a = { "Ala", "Alanine", },
c = { "Cys", "Cysteine", },
d = { "Asp", "Aspartate", },
e = { "Glu", "Glutamate", },
f = { "Phe", "Phenylalanine", },
g = { "Gly", "Glycine", },
h = { "His", "Histidine", },
i = { "Ile", "Isoleucine", },
k = { "Lys", "Lysine", },
l = { "Leu", "Leucine", },
m = { "Met", "Methionine", },
n = { "Asn", "Asparagine", },
p = { "Pro", "Proline", },
q = { "Gln", "Glutamine", },
r = { "Arg", "Arginine", },
s = { "Ser", "Serine", },
t = { "Thr", "Threonine", },
v = { "Val", "Valine", },
w = { "Trp", "Tryptophan", },
y = { "Tyr", "Tyrosine", },
}
--
-- end of globals section
--
local function round ( x )
if x == nil then
return "nil"
end
return x - x % 0.001
end
--
-- LigDetector: ultra-paranoid method for detecting ligands
--
-- the current method is adapted from DetectLigand,
-- originally by Jean-Bob with an assist from Bruno Kestemont
--
function LigDetector ()
segCnt = structure.GetCount()
segCnt2 = segCnt
--
-- do it the time-honored way
--
while structure.GetSecondaryStructure ( segCnt2 ) == "M" do
segCnt2 = segCnt2 - 1
end
--
-- scan the whole protein
--
ligandList = {}
for ii = 1, segCnt do
if structure.GetSecondaryStructure ( ii ) == "M" then
--
-- collect detailed info about each ligand segment
--
local llocked = structure.IsLocked ( ii )
local laa = structure.GetAminoAcid ( ii )
local latom = structure.GetAtomCount ( ii )
local lrota = rotamer.GetCount ( ii )
local lscor = current.GetSegmentEnergyScore ( ii )
local lmod = false
local lodd = false
local ldub = false
--
-- decide whether ligand itself is modifiable
-- (even if not modifiable, ligand may interact with neighboring protein)
--
if not llocked
and latom > 5
and lrota > 1
and lscor ~= 0
then
lmod = true
lModable = lModable + 1
end
--
-- look for non-standard ligand in the middle
--
if ii < segCnt2 then
lodd = true
lOddball = lOddball + 1
end
--
-- look for doubtful ligand with valid AA code
--
if AminoAcids [ laa ] ~= nil then
ldub = true
lDubious = lDubious + 1
end
ligandList [ #ligandList + 1 ] = { ii, llocked, laa, latom, lrota, lscor, lmod, lodd, ldub, }
end
end
print ( #ligandList .. " ligand segments" )
for ii = 1, #ligandList do
print (
"ligand "
.. ii ..
" at segment "
.. ligandList [ ii ] [ LTSEG ] ..
", locked = "
.. tostring ( ligandList [ ii ] [ LTLOCK ] ) ..
", aa = \""
.. ligandList [ ii ] [ LTAA ] ..
"\", atoms = "
.. ligandList [ ii ] [ LTATOM ] ..
", rotamers = "
.. ligandList [ ii ] [ LTROTA ] ..
", score = "
.. round ( ligandList [ ii ] [ LTSCOR ] ) ..
", modifiable = "
.. tostring ( ligandList [ ii ] [ LTMOD ] )
)
if not ligandList [ 1 ] [ LTODD ] then
print ( "normal ligand, located at end of protein" )
else
print ( "non-standard ligand, ligand section not at end" )
print ( "most ligand-aware recipes will not work correctly" )
end
end
return ligandList
end
function ShowReport ( ligandList )
local lword = ""
local LIGAND = "ligand"
local LIGANDS = "ligands"
local ask = dialog.CreateDialog ( ReVersion )
local segCnt = structure.GetCount()
ask.l05 = dialog.AddLabel ( segCnt .. " segments" )
if #ligandList == 0 then
ask.l10 = dialog.AddLabel ( "No ligands detected" )
else
if #ligandList > 1 then
lword = LIGANDS
else
lword = LIGAND
end
ask.l10 = dialog.AddLabel ( #ligandList .. " " .. lword .. " found" )
if lModable > 0 then
if lModable > 1 then
lword = LIGANDS
else
lword = LIGAND
end
ask.l15 = dialog.AddLabel ( lModable .. " modifiable " .. lword .. " found" )
end
if lOddball > 0 then
if lOddball > 1 then
lword = LIGANDS
else
lword = LIGAND
end
ask.l20 = dialog.AddLabel ( lOddball .. " mid-protein ligands found" )
end
if lDubious > 0 then
if lDubious > 1 then
lword = LIGANDS
else
lword = LIGAND
end
ask.l25 = dialog.AddLabel ( lDubious .. " suspect ligands with valid AA codes" )
end
ask.sep = dialog.AddLabel ( "--------------------" )
for jj = 1, #ligandList do
local lockstat = "unlocked"
if ligandList [ jj ] [ LTLOCK ] then
lockstat = "locked"
end
ask [ "lig" .. jj ] = dialog.AddLabel (
"ligand # "
.. jj ..
", segment "
.. ligandList [ jj ] [ LTSEG ] ..
", "
.. lockstat ..
", score = "
.. round ( ligandList [ jj ] [ LTSCOR ] )
)
if ligandList [ jj ] [ LTMOD ] then
ask [ "lig" .. jj .. "mod" ] = dialog.AddLabel (
"modifiable, atoms = "
.. ligandList [ jj ] [ LTATOM ] ..
", rotamers = "
.. tostring ( ligandList [ jj ] [ LTROTA ] )
)
else
local nmodstr = "not modifiable: "
local mods = 0
if ligandList [ jj ] [ LTLOCK ] then
if mods > 0 then
nmodstr = nmodstr .. ", "
end
nmodstr = nmodstr .. "locked"
mods = mods + 1
end
if ligandList [ jj ] [ LTROTA ] <= 1 then
if mods > 0 then
nmodstr = nmodstr .. ", "
end
nmodstr = nmodstr .. "single rotamer"
mods = mods + 1
end
if ligandList [ jj ] [ LTATOM ] <= 5 then
if mods > 0 then
nmodstr = nmodstr .. ", "
end
nmodstr = nmodstr .. "single atom"
mods = mods + 1
end
ask [ "lig" .. jj .. "nmod" ] = dialog.AddLabel ( nmodstr )
end
if ligandList [ jj ] [ LTODD ] then
ask [ "lig" .. jj .. "odd" ] = dialog.AddLabel (
"WARNING: position "
.. ligandList [ jj ] [ LTSEG ] ..
" not at end of protein "
)
end
if ligandList [ jj ] [ LTDUB ] then
ask [ "lig" .. jj .. "dub" ] = dialog.AddLabel (
"WARNING: doubtful ligand, AA code \""
.. ligandList [ jj ] [ LTAA ] ..
"\" = "
.. AminoAcids [ ligandList [ jj ] [ LTAA ] ] [ 2 ]
)
end
ask [ "sep" .. jj ] = dialog.AddLabel ( "--------------------" )
end
end
ask.OK = dialog.AddButton ( "OK", 1 )
dialog.Show ( ask )
end
function main ()
print ( ReVersion )
print ( "Puzzle: " .. puzzle.GetName () )
print ( "Track: " .. ui.GetTrackName () )
--
-- search for ligand
--
local ligs = LigDetector ()
ShowReport ( ligs )
--
-- exit via the cleanup function
--
cleanup ()
end
function cleanup ( errmsg )
if CLEANUPENTRY ~= nil then
return
end
CLEANUPENTRY = true
print ( "---" )
--
-- model 100 - print recipe name, puzzle, track, time, score, and gain
--
local reason
local start, stop, line, msg
if errmsg == nil then
reason = "complete"
else
--
-- model 120 - civilized error reporting,
-- thanks to Bruno K. and Jean-Bob
--
start, stop, line, msg = errmsg:find ( ":(%d+):%s()" )
if msg ~= nil then
errmsg = errmsg:sub ( msg, #errmsg )
end
if errmsg:find ( "Cancelled" ) ~= nil then
reason = "cancelled"
else
reason = "error"
end
end
print ( ReVersion .. " " .. reason )
print ( "Puzzle: " .. puzzle.GetName () )
print ( "Track: " .. ui.GetTrackName () )
if reason == "error" then
print ( "Unexpected error detected" )
print ( "Error line: " .. line )
print ( "Error: \"" .. errmsg .. "\"" )
end
end
xpcall ( main, cleanup )
--- end of recipe