Code
--[[
AA Edit - get and set primary structure
The primary structure of a protein is the sequence of the
amino acids that make up the protein.
AA Edit displays the current primary structure as a sequence
of single-character amino acid codes. (Similar codes are
used for DNA and RNA, see "special cases" below.)
The displayed value can be selected and cut or copied.
The string of single-character codes is similar to the
FASTA format accepted by many protein related tools.
FASTA also allows for header information, which some tools,
such as BLAST, require.
If there are any "mutable" segments, the "Change" button
is displayed, and a a new value can be pasted in. If there are
no mutable segments, any input to the "seq" box is ignored.
When the "Change" button is clicked, the currently displayed
primary structure is applied to the protein. The input amino
acid codes are converted to lower case.
The recipe checks each amino acid code against the list of 20
amino acids used in Foldit. Any input not found in the list is
ignored, and the corresponding segment is left unchanged.
Some puzzles have a mix of mutable and non-mutable segments.
The recipe does not attempt to change any non-mutable segments.
If the structure list is longer than the protein, AA Edit
discards the extra entries at the end of the list.
If the structure list is shorter than the protein, AA Edit
applies the list to the first *n* segments of the protein,
where *n* is the length of the list. Any remaining segments
are unchanged.
All changes are written to the scriptlog.
special cases
-------------
Some puzzles contain two or more separate protein chains.
The "insulin mutant" puzzle, which appears periodically as a
revisiting puzzle, is an example.
AA Edit detects the beginning and end of a protein chain by
checking the atom count. Each chain is presented separately,
identified by a chain id: "A", "B", "C", and so on.
All the normal rules apply to each chain.
Some puzzles have one or more ligands, each represented by a segment
which returns "x" or "unk" for its amino acid type. This code and
anything else not found in the normal list of 20 amino acids
is changed to "x" for the purposes of this recipe.
Segments with an "x" for their amino acid code in the replacment
string are not changed.
Each ligand is presented as a separate chain.
Very rarely, Foldit puzzles may contain RNA or DNA. These are chains
of nucleobases instead of amino acids. Each segment is one nucleobase.
Foldit uses two-character codes for RNA and DNA. AA Edit translates
these codes into single-character codes. The single-character codes are
ambiguous, for example, RNA adenine is code "ra" in Foldit, and DNA
adenine is "da". Both become "a" externally, which is also used
for alanine in a protein.
AA Edit treats each DNA or RNA section as a separate chain. This allows
it to keep the ambiguous codes straight.
The handling of RNA and DNA has only been tested for RNA. So far, RNA has
only appeared on one science puzzle, and the RNA was not mutable in
that puzzle. DNA has appeared only in intro puzzles, which don't allow
recipes. It's possible that problems may appear if there are ever
for-credit DNA or RNA puzzles again.
An even rarer case was in found in puzzle 879, segment 134, and
puzzle 1378b, segment 30, where an amino acid was modified by
a glycan.
The code "unk" was used for these modified amino acids, but they
did not have the secondary structure code "M" used for ligands.
A modified amino acid like this is treated as protein, and does not break
the amino acid chain.
See "AA Copy Paste Compare v 1.1.1 -- Brow42" for
a full-function recipe that works with primary and
primary structures.
version 1.2 -- 2016/12/23 -- LociOiling
* clone of PS Edit v1.2
* enable 1-step undo with undo.SetUndo ( false )
version 2.0 -- 2018/09/02 -- LociOiling
* detect and report multiple chains
* force filters on at beginning and end
* handle DNA and RNA, use single-letter codes externally
* refine scriptlog output, eliminate timing calls
version 2.0.1 -- 2020/04/16 -- LociOiling
* handle proline at N-terminal correctly
version 2.0.2 -- 2022/05/20 -- LociOiling
* handle cases where structure.GetAminoAcid throws an error
* handle lots of little peptides
* don't treat ligands as chains
* handle a binder target (or similar) with no C-term
* fix bug in setChain
version 2.0.3 -- 2023/01/08 -- LociOiling
* handle missing N terminal this time, a feature of the
ED Reconstruction puzzles (kludgistic fix)
version 2.1 -- 2023/05/09 - LociOilng
* use distances to determine chains
* pageable display of chains
version 3.0 -- 2025/05/01 - LociOilng
* refine chain detection logic
* skip mutables in general
]]--
--
-- Globals
--
Recipe = "AA Edit"
Version = "3.0"
ReVersion = Recipe .. " " .. Version
mutable = false -- true if any mutable segments found
--
-- tables for converting external nucleobase codes to Foldit internal codes
--
RNAin = {
a = "ra",
c = "rc",
g = "rg",
u = "ru",
}
DNAin = {
a = "da",
c = "dc",
g = "dg",
t = "dt",
}
--
-- common section used by all safe functions
--
safefun = {}
--
-- CommonError -- common routine used by safe functions,
-- checks for common errors
--
-- checks for errors like bad segment and bad band index
-- even for functions where they don't apply -- efficiency
-- not a key concern here
--
-- any error that appears more than once gets tested here
--
-- first return codes may not be unique
--
safefun.CommonError = function ( errmsg )
local BADSEG = "segment index out of bounds"
local ARGCNT = "Expected %d+ arguments."
local BADARG = "bad argument #%d+ to '%?' (%b())"
local EXPECT = "expected, got"
local BADATOM = "atom number out of bounds"
local BADBAND = "band index out of bounds"
local BADSYMM = "symmetry index out of bounds"
local BADACID = "invalid argument, unknown aa code"
local errp, errq = errmsg:find ( BADSEG )
if errp ~= nil then
return -1, errmsg
end
--
-- "bad argument" messages include argument type errors
-- and some types of argument value errors
-- trap only the argument type errors here
--
local errp, errq, errd = errmsg:find ( BADARG )
if errp ~= nil then
local errp2 = errd:find ( EXPECT )
if errp2 ~= nil then
return -997, errmsg -- argument type error
end
end
local errp, errq = errmsg:find ( ARGCNT )
if errp ~= nil then
return -998, errmsg
end
local errp, errq = errmsg:find ( BADATOM )
if errp ~= nil then
return -2, errmsg
end
local errp, errq = errmsg:find ( BADBAND )
if errp ~= nil then
return -3, errmsg
end
local errp, errq = errmsg:find ( BADACID )
if errp ~= nil then
return -2, errmsg
end
local errp, errq = errmsg:find ( BADSYMM )
if errp ~= nil then
return -3, errmsg
end
return 0, errmsg
end
--
-- end of common section used by all safe functions
--
--
-- structure.SafeGetAminoAcid uses pcall
-- to call structure.GetAminoAcid, returning
-- a numeric return code.
--
-- If the return code is non-zero,
-- an error message is also returned.
--
-- The return codes are:
--
-- 0 - successful, second returned value is
-- the one-letter amino acid code
-- of the specified segment (string).
-- -1 - bad segment index
-- -99x - other error
--
structure.SafeGetAminoAcid = function ( ... )
local good, errmsg = pcall ( structure.GetAminoAcid, unpack ( arg ) )
if good then
return 0, errmsg
else
local crc, err2 = safefun.CommonError ( errmsg )
if crc ~= 0 then
return crc, err2
end
return -999, err2
end
end
function GetAA ( seg )
local good, errmsg = structure.SafeGetAminoAcid ( seg )
if good ~= 0 then
errmsg = "unk"
end
return errmsg
end
protNfo = { -- protNfo--protNfo--protNfo--protNfo--protNfo--protNfo--protNfo
--[[
protNfo package version 0.7
protNfo is packaged as a psuedo-class or psuedo-module
containing a mix of data fields and functions
all entries must be terminated with a comma to keep Lua happy
the commas aren't necessary if only function definitions are present
versions
--------
0.3 - add chain detection from AA Edit 2.0
0.4 - add ligand detection from GetSeCount
0.4 - merges in the ligand logic from GetSeCount
0.5 - still a work in progress
0.6 - integrate AminoAcids table
0.7 - trim the info collected, remove atom count logic
]]--
--
-- AminoAcids
--
-- names and key properties of all known amino acids and nucleobases
--
-- Notes:
--
-- * commented entries (at the end) are not in Foldit
-- * one-letter amino acid code is the table key
-- * two-letter RNA and DNA nucleotides are also valid
-- * the fields in this table are now referenced by name
-- * the "unk" and "x" codes are considered protein, unless the segment is marked as
-- ligand in the secondary structure ( code "M" )
-- * acref is atom count mid-chain, used to detect multiple peptide chains
--
AminoAcids = {
a = { code = "a", ctype = "P", acref = 10, short = "Ala", long = "Alanine", hydrop = 1.8 },
c = { code = "c", ctype = "P", acref = 11, short = "Cys", long = "Cysteine", hydrop = 2.5 },
d = { code = "d", ctype = "P", acref = 12, short = "Asp", long = "Aspartate", hydrop = -3.5 },
e = { code = "e", ctype = "P", acref = 15, short = "Glu", long = "Glutamate", hydrop = -3.5 },
f = { code = "f", ctype = "P", acref = 20, short = "Phe", long = "Phenylalanine", hydrop = 2.8 },
g = { code = "g", ctype = "P", acref = 7, short = "Gly", long = "Glycine", hydrop = -0.4 },
h = { code = "h", ctype = "P", acref = 17, short = "His", long = "Histidine", hydrop = -3.2 },
i = { code = "i", ctype = "P", acref = 19, short = "Ile", long = "Isoleucine", hydrop = 4.5 },
k = { code = "k", ctype = "P", acref = 22, short = "Lys", long = "Lysine", hydrop = -3.9 },
l = { code = "l", ctype = "P", acref = 19, short = "Leu", long = "Leucine", hydrop = 3.8 },
m = { code = "m", ctype = "P", acref = 17, short = "Met", long = "Methionine ", hydrop = 1.9 },
n = { code = "n", ctype = "P", acref = 14, short = "Asn", long = "Asparagine", hydrop = -3.5 },
p = { code = "p", ctype = "P", acref = 15, short = "Pro", long = "Proline", hydrop = -1.6 },
q = { code = "q", ctype = "P", acref = 17, short = "Gln", long = "Glutamine", hydrop = -3.5 },
r = { code = "r", ctype = "P", acref = 24, short = "Arg", long = "Arginine", hydrop = -4.5 },
s = { code = "s", ctype = "P", acref = 11, short = "Ser", long = "Serine", hydrop = -0.8 },
t = { code = "t", ctype = "P", acref = 14, short = "Thr", long = "Threonine", hydrop = -0.7 },
v = { code = "v", ctype = "P", acref = 16, short = "Val", long = "Valine", hydrop = 4.2 },
w = { code = "w", ctype = "P", acref = 24, short = "Trp", long = "Tryptophan", hydrop = -0.9 },
y = { code = "y", ctype = "P", acref = 21, short = "Tyr", long = "Tyrosine", hydrop = -1.3 },
--
-- codes for ligands or modified amino acids
--
x = { code = "x", ctype = "P", acref = 0, short = "Xaa", long = "Unknown", hydrop = 0 },
unk = { code = "x", ctype = "P", acref = 0, short = "Xaa", long = "Unknown", hydrop = 0 },
--
-- bonus! RNA nucleotides
--
ra = { code = "a", ctype = "R", acref = 33, short = "a", long = "Adenine", hydrop = 0, },
rc = { code = "c", ctype = "R", acref = 31, short = "c", long = "Cytosine", hydrop = 0, },
rg = { code = "g", ctype = "R", acref = 34, short = "g", long = "Guanine", hydrop = 0, },
ru = { code = "u", ctype = "R", acref = 30, short = "u", long = "Uracil", hydrop = 0, },
--
-- bonus! DNA nucleotides
--
da = { code = "a", ctype = "D", acref = 0, short = "a", long = "Adenine", hydrop = 0, },
dc = { code = "c", ctype = "D", acref = 0, short = "c", long = "Cytosine", hydrop = 0, },
dg = { code = "g", ctype = "D", acref = 0, short = "g", long = "Guanine", hydrop = 0, },
dt = { code = "t", ctype = "D", acref = 0, short = "t", long = "Thymine", hydrop = 0, },
--
-- dusty attic! musty cellar! jumbled boxroom!
-- can't bear to part with these treasures
--
-- b = { code = "b", ctype = "P", acref = 10, short = "Asx", long = "Asparagine/Aspartic acid", hydrop = 0 },
-- j = { code = "j", ctype = "P", acref = 10, short = "Xle", long = "Leucine/Isoleucine", hydrop = 0 },
-- o = { code = "o", ctype = "P", acref = 10, short = "Pyl", long = "Pyrrolysine", hydrop = 0 },
-- u = { code = "u", ctype = "P", acref = 10, short = "Sec", long = "Selenocysteine", hydrop = 0 },
-- z = { code = "z", ctype = "P", acref = 10, short = "Glx", long = "Glutamine or glutamic acid", hydrop = 0 } ,
},
aalist = {}, -- list of AA codes
rnalist = {}, -- list of RNA codes
dnalist = {}, -- list DNA codes
Ctypes = {
P = "protein",
D = "DNA",
R = "RNA",
M = "ligand",
},
PROTEIN = "P",
LIGAND = "M",
RNA = "R",
DNA = "D",
UNKNOWN_AA = "x",
UNKNOWN_BASE = "xx",
HELIX = "H",
SHEET = "E",
LOOP = "E",
segCnt = 0, -- unadjusted segment count
segCnt2 = 0, -- segment count adjusted for terminal ligands
aa = {}, -- amino acid codes
ss = {}, -- secondary structure codes
ACRF = 4.0, -- alpha carbon reference distance (protein)
PRF = 8.0, -- phosphorus reference distance (RNA/DNA)
acdx = {}, -- alpha carbon distance
ctype = {}, -- segment type - P, M, R, D
first = {}, -- true if segment is first in chain
last = {}, -- true if segment is last in chain
fastac = {}, -- external code for FASTA-style output
short = {}, -- short name
long = {}, -- long name
chainid = {}, -- chain id
chainpos = {}, -- position in chain
chains = {}, -- summary of chains
ligands = {}, -- ligand table
DEBUG = false,
round = function ( ii )
return ii - ii % 0.001
end,
--
-- get a chain id - works for A through ZZ, after that returns "??"
--
getchid = function ( ndx )
local chainid = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" }
local chmod = ( ndx - 1 ) % #chainid
local chid = chainid [ chmod + 1 ]
local chquo = math.floor ( ( ndx - 1 ) / #chainid )
if chquo > 0 then
if chquo + 1 <= #chainid then
chid = chainid [ chquo + 1 ] .. chid
else
chid = "!!"
end
end
return chid
end,
getChains = function ( self )
--
-- getChains - build a table of the chains found
--
-- Most Foldit puzzles contain only a single protein (peptide) chain.
-- A few puzzles contain ligands, and some puzzles have had two
-- protein chains. Foldit puzzles may also contain RNA or DNA.
--
-- For proteins, the atom count can be used to identify the first
-- (N terminal) and last (C terminal) ends of the chain. The AminoAcids
-- table has the mid-chain atom counts for each amino acid.
--
-- Cysteine is a special case, since the presence of a disulfide
-- bridge also changes the atom count.
--
-- For DNA and RNA, the beginning and end of the chain is determined
-- by context at present. For example, if the previous segment was protein
-- and this segment is DNA, it's the start of a chain.
--
-- Each ligand is treated as a chain of its own, with a length of 1.
--
-- chain table entries
-- -------------------
--
-- ctype - chain type - "P" for protein, "M" for ligand, "R" for RNA, "D" for DNA
-- fasta - FASTA-format sequence, single-letter codes (does not include FASTA header)
-- start - Foldit segment number of sequence start
-- stop - Foldit segment number of sequence end
-- len - length of sequence
-- chainid - chain id assigned to entry, "A", "B", "C", and so on
--
-- For DNA and RNA, fasta contains single-letter codes, so "a" for adenine.
-- The codes overlap the amino acid codes (for example, "a" for alanine).
-- The DNA and RNA codes must be converted to the appropriate two-letter codes Foldit
-- uses internally, for example "ra" for RNA adenine and "da" for DNA adenine.
--
local chainz = {}
local chindx = 0
local curchn = nil
for ii = 1, self.segCnt do
if self.first [ ii ] then
chindx = chindx + 1
chainz [ chindx ] = {}
curchn = chainz [ chindx ]
curchn.ctype = self.ctype [ ii ]
curchn.fasta = ""
curchn.ss = ""
curchn.start = ii
curchn.stop = ii
curchn.chainid = protNfo.getchid ( chindx )
curchn.len = 0
end
curchn.fasta = curchn.fasta .. self.fastac [ ii ]
curchn.ss = curchn.ss .. self.ss [ ii ]
self.chainid [ #self.chainid + 1 ] = curchn.chainid
self.chainpos [ #self.chainpos + 1 ] = ii - curchn.start + 1
if self.last [ ii ] then
curchn.stop = ii
curchn.len = curchn.stop - ( curchn.start - 1 )
end
end
return chainz
end,
getLigands = function ( self )
--
-- ultra-paranoid method for detecting ligands
--
-- each ligand segment is treated separately in this version
--
local ligandz = {}
for ii = 1, self.segCnt do
if self.ss [ ii ] == "M" then
local atoms = structure.GetAtomCount ( ii )
local rots = rotamer.GetCount ( ii )
local sscor = current.GetSegmentEnergyScore ( ii )
ligandz [ #ligandz + 1 ] = { seg = ii, atoms = atoms, rots = rots, score = sscor }
end
end
if self.DEBUG then
print ( #ligandz .. " ligands" )
for jj = 1, #ligandz do
print ( "ligand # "
.. jj ..
", segment = "
.. ligandz [ jj ].seg ..
", atoms = "
.. ligandz [ jj ].atoms ..
", rotamers = "
.. ligandz [ jj ].rots ..
", score = "
.. self.round ( ligandz [ jj ].score )
)
if ligandz [ jj ].seg < self.segCnt2 then
print ( "WARNING: non-standard ligand at segment "
.. ligandz [ jj ].seg ..
", most ligand-aware recipes won't work properly" )
end
end
end
return ligandz
end,
setNfo = function ( self )
self.segCnt = structure.GetCount()
--
-- standard ligand adjustment
--
self.segCnt2 = self.segCnt
while self.ss [ self.segCnt2 ] == "M" do
self.segCnt2 = self.segCnt2 - 1
end
if self.segCnt2 == self.segCnt then
print ( "segment count = " .. self.segCnt )
else
print ( "original segment count = " .. self.segCnt )
print ( "adjusted segment count = " .. self.segCnt2 )
end
--
-- partition AminoAcids for display purposes
--
for key, value in pairs ( protNfo.AminoAcids ) do
if value.ctype == self.PROTEIN then
self.aalist [ #self.aalist + 1 ] = key
elseif value.ctype == self.RNA then
self.rnalist [ #self.rnalist + 1 ] = key
elseif value.ctype == self.DNA then
self.dnalist [ #self.dnalist + 1 ] = key
end
end
--
-- initial scan - retrieve basic info from Foldit and AminoAcids table
--
for ii = 1, self.segCnt do
self.aa [ #self.aa + 1 ] = structure.GetAminoAcid ( ii )
self.ss [ #self.ss + 1 ] = structure.GetSecondaryStructure ( ii )
--
-- look it up
--
local aatab = self.AminoAcids [ self.aa [ ii ] ]
if aatab ~= nil then
self.ctype [ #self.ctype + 1 ] = aatab.ctype
--
-- even the codes 'x' or 'unk' are considered protein
-- unless the secondary structure is "M"
--
-- this handles glycosylated amino acids
-- in puzzles 879, 1378b, and similar
--
-- segment 134 in puzzle 879 is the example,
-- it's no longer asparagine, but it is part of
-- the peptide chain
--
if self.ss [ ii ] == self.LIGAND then
self.ctype [ ii ] = self.LIGAND
end
--
-- other info
--
else
--
-- special case: unknown code - mark it as ligand
--
-- this should not occur, but just in case
--
self.ctype [ #self.ctype + 1 ] = self.LIGAND
aa = self.UNKNOWN_AA -- a known unknown
aatab = self.AminoAcids [ aa ]
end
--
-- get distance
--
if ii < self.segCnt then
protNfo.acdx [ #protNfo.acdx + 1 ] = structure.GetDistance ( ii, ii + 1 )
else
protNfo.acdx [ #protNfo.acdx + 1 ] = 10000
end
--
-- save values from amino acids table
--
self.short [ #self.short + 1 ] = aatab.short
self.long [ #self.long + 1 ] = aatab.long
self.fastac [ #self.fastac + 1 ] = aatab.code
self.first [ #self.first + 1 ] = false
self.last [ #self.last + 1 ] = false
end -- end of initial scan
--
-- to determine first and last in chain for all types,
-- based on change in type (control break)
--
for ii = 1, self.segCnt do
if ii == 1 then
self.first [ ii ] = true
elseif ii == self.segCnt then
self.last [ ii ] = true
else
if self.ctype [ ii ] ~= self.ctype [ ii - 1 ] then
self.first [ ii ] = true
end
if self.ctype [ ii ] ~= self.ctype [ ii + 1 ] then
self.last [ ii ] = true
end
end
if self.ctype [ ii ] == self.LIGAND then
self.first [ ii ] = true
self.last [ ii ] = true
end
if self.first [ ii ] and self.DEBUG then
print ( "chain start at segment " .. ii .. ", type = " .. self.Ctypes [ self.ctype [ ii ] ] )
end
if self.last [ ii ] and self.DEBUG then
print ( "chain end at segment " .. ii .. ", type = " .. self.Ctypes [ self.ctype [ ii ] ] )
end
end
--
-- look for chain breaks based on distances
--
for ii = 1, self.segCnt do
local stype = self.ctype [ ii ] -- type of this segment
local gref = 0 -- gap reference distance
if stype == self.PROTEIN then
gref = self.ACRF
elseif stype == self.DNA then
gref = self.PRF
elseif stype == self.RNA then
gref = self.PRF
end
--
-- up until last segment
--
if ii < self.segCnt then
if self.ctype [ ii + 1 ] == stype then
if self.acdx [ ii ] > gref then
self.last [ ii ] = true
if self.DEBUG then
print ( "chain end at " .. ii .. " due to gap" )
end
end
end
end
--
-- after first segment
--
if ii > 1 then
if self.ctype [ ii - 1 ] == stype then
if self.acdx [ ii - 1 ] > gref then
self.first [ ii ] = true
if self.DEBUG then
print ( "chain start at " .. ii .. " due to gap" )
end
end
end
end
end
--
-- summarize the chain info
--
self.chains = self:getChains ()
--
-- get the ligand info
--
self.ligands = self:getLigands ()
end,
} -- protNfo--protNfo--protNfo--protNfo--protNfo--protNfo--protNfo
--
-- end of globals section
--
function setChain ( chain )
local changes = 0
local errz = 0
local offset = chain.start - 1
local fastan = "" -- possibly changed chain
for ii = 1, chain.stop - ( chain.start - 1 ) do
local sType = chain.fasta:sub ( ii, ii )
local oType = chain.fastab:sub ( ii, ii )
--
-- for DNA and RNA, convert FASTA to Foldit
--
if chain.ctype == protNfo.DNA then
sType = DNAin [ sType ]
if sType == nil then
sType = protNfo.UNKNOWN_BASE
end
oType = DNAin [ oType ]
if oType == nil then
oType = protNfo.UNKNOWN_BASE
end
elseif chain.ctype == protNfo.RNA then
sType = RNAin [ sType ]
if sType == nil then
sType = protNfo.UNKNOWN_BASE
end
oType = RNAin [ oType ]
if oType == nil then
oType = protNfo.UNKNOWN_BASE
end
end
if sType ~= oType then
local sName = protNfo.AminoAcids [ sType ]
if sName ~= nil then
if sName.code == "x" then -- unknown amino acid
print ( "segment "
.. ii + offset ..
" ("
.. chain.chainid .. ":" .. ii ..
") unknown amino acid code \""
.. sType .. "\" not allowed" )
elseif protNfo.mute [ ii + offset ] then
structure.SetAminoAcid ( ii + offset, sType )
local newaa = structure.GetAminoAcid ( ii + offset )
if newaa == sType then
changes = changes + 1
fastan = fastan .. protNfo.AminoAcids [ sType ].code
else
print ( "segment "
.. ii + offset ..
" ("
.. chain.chainid .. ":" .. ii ..
") mutation to type \""
.. sType .. "\" failed" )
errz = errz + 1
fastan = fastan .. protNfo.AminoAcids [ oType ].code
end
else
print ( "segment "
.. ii + offset ..
" ("
.. chain.chainid .. ":" .. ii ..
") is not mutable, skipping change to type \""
.. sType .. "\"" )
errz = errz + 1
fastan = fastan .. protNfo.AminoAcids [ oType ].code
end
else
print ( "segment "
.. ii + offset ..
" ("
.. chain.chainid .. ":" .. ii ..
"), skipping invalid type \""
.. sType ..
"\"" )
errz = errz + 1
fastan = fastan .. protNfo.AminoAcids [ oType ] .code
end
else
fastan = fastan .. protNfo.AminoAcids [ oType ].code
end
end
chain.fasta = fastan
chain.fastab = fastan
return changes, errz
end
function GetParameters ( chnz, peptides, gchn, minseg, maxseg, totlen, totmut, chndx )
if chndx == nil then
chndx = 1
end
local CHPAGE = 4
local rc = 0
local dlog = dialog.CreateDialog ( ReVersion )
local chmax = math.min ( #chnz, chndx + CHPAGE - 1 )
dlog.sc0 = dialog.AddLabel ( "segment count = " .. structure.GetCount () )
local cwd = "chain"
if #chnz > 1 then
cwd = "chains"
end
dlog.chz = dialog.AddLabel ( #chnz .. " chains" )
if #chnz > CHPAGE then
dlog.CHDisp = dialog.AddLabel ( "displaying " .. chndx .. " - " .. chmax .. " of " .. #chnz .. " chains" )
end
for ii = chndx, chmax do
local chain = chnz [ ii ]
dlog [ "chn" .. ii .. "l1" ] = dialog.AddLabel (
"Chain "
.. chain.chainid ..
" ("
.. protNfo.Ctypes [ chnz [ ii ].ctype ] ..
")"
)
if chain.mute == 0 then
dlog [ "chn" .. ii .. "l2" ] = dialog.AddLabel (
"segments "
.. chain.start ..
"-"
.. chain.stop ..
", length = "
.. chain.len
)
else
dlog [ "chn" .. ii .. "l2" ] = dialog.AddLabel (
"segments "
.. chain.start ..
"-"
.. chain.stop ..
", length = "
.. chain.len ..
", mutables = "
.. chain.mute
)
end
dlog [ "chn" .. ii .. "ps" ] = dialog.AddTextbox ( "seq", chain.fasta )
end
dlog.u0 = dialog.AddLabel ( "" )
if mutable then
dlog.u1 = dialog.AddLabel ( "Usage: click in text box, " )
dlog.u2 = dialog.AddLabel ( "then use select all and copy, cut, or paste" )
dlog.u3 = dialog.AddLabel ( "to save or change primary structure" )
else
dlog.u1 = dialog.AddLabel ( "Usage: click in text box," )
dlog.u2 = dialog.AddLabel ( "then use select all and copy" )
dlog.u3 = dialog.AddLabel ( "to save primary structure" )
end
dlog.w0 = dialog.AddLabel ( "" )
if mutable then
dlog.w1 = dialog.AddLabel ( "Windows: ctrl + a = select all" )
dlog.w2 = dialog.AddLabel ( "Windows: ctrl + x = cut" )
dlog.w3 = dialog.AddLabel ( "Windows: ctrl + c = copy" )
dlog.w4 = dialog.AddLabel ( "Windows: ctrl + v = paste" )
else
dlog.w1 = dialog.AddLabel ( "Windows: ctrl + a = select all" )
dlog.w3 = dialog.AddLabel ( "Windows: ctrl + c = copy" )
end
dlog.z0 = dialog.AddLabel ( "" )
if mutable then
dlog.ok = dialog.AddButton ( "Change" , 1 )
end
if chndx > 1 then
dlog.prev = dialog.AddButton ( "Prev", 2 )
end
if chmax < #chnz then
dlog.next = dialog.AddButton ( "Next", 3 )
end
dlog.exit = dialog.AddButton ( "Exit" , 0 )
repeat
rc = dialog.Show ( dlog )
if rc >= 1 then
for ii = chndx, chmax do
chnz [ ii ].fasta = ( dlog [ "chn" .. ii .. "ps" ].value:lower ()):sub ( 1, chnz [ ii ].len )
end
end
if rc == 2 then
rc = GetParameters ( chnz, peptides, gchn, minseg, maxseg, totlen, totmut, chndx - CHPAGE )
elseif rc == 3 then
rc = GetParameters ( chnz, peptides, gchn, minseg, maxseg, totlen, totmut, chndx + CHPAGE )
end
until rc < 2
return rc
end
function main ()
print ( ReVersion )
print ( "Puzzle: " .. puzzle.GetName () )
local trk = ui.GetTrackName ()
if trk ~= "default" then
print ( "Track: " .. trk )
end
undo.SetUndo ( false )
protNfo:setNfo ()
protNfo.mute = {} -- protNfo no longer collects mutables by default
for ii = 1, structure.GetCount () do
protNfo.mute [ #protNfo.mute + 1 ] = structure.IsMutable ( ii )
if protNfo.mute [ ii ] == true then
mutable = true
end
end
for ii = 1, #protNfo.chains do
protNfo.chains [ ii ].fastab = protNfo.chains [ ii ].fasta
end
local changeNum = 0
local chnTbl = protNfo.chains
for cc = 1, #chnTbl do
local mutables = 0
for ii = chnTbl [ cc ].start, chnTbl [ cc ].stop do
if protNfo.mute [ ii ] then
mutables = mutables + 1
end
end
chnTbl [ cc ].mute = mutables
end
print ( #chnTbl .. " chains and ligands" )
--
-- print the chains and make some tests
--
local totlen = 0
local maxlen = 0
local chncnt = 0
local mutchn = 0
local totmut = 0
local gchn = ""
local minseg = 99999
local maxseg = 0
for ii = 1, #chnTbl do
local chain = chnTbl [ ii ]
if chain.stop == nil then
chain.stop = 999999
end
if chain.ctype ~= "M" then
if chain.mute == 0 then
print ( "chain " .. chain.chainid .. " (" .. protNfo.Ctypes [ chain.ctype ] .. "), segments " .. chain.start .. "-" .. chain.stop .. ", length = " .. chain.len )
else
print ( "chain " .. chain.chainid .. " (" .. protNfo.Ctypes [ chain.ctype ] .. "), segments " .. chain.start .. "-" .. chain.stop .. ", length = " .. chain.len .. ", mutables = " .. chain.mute )
end
print ( chain.fasta )
gchn = gchn .. chain.fasta
chncnt = chncnt + 1
if chain.mute > 0 then
mutchn = mutchn + 1
end
if chain.start < minseg then
minseg = chain.start
end
if chain.stop > maxseg then
maxseg = chain.stop
end
totlen = totlen + chain.len
if chain.len > maxlen then
maxlen = chain.len
end
else
print ( "ligand " .. chain.chainid .. ", segment = " .. chain.start )
end
end
--
-- assume the worse if average length is under 25
--
local peptides = false
local newchn = {}
local avglen = totlen / chncnt
if avglen < 25 and mutchn == 0 then
peptides = true
print ( "multiple immutable peptides found" )
print ( "these are likely fragments of a larger protein" )
print ( "combined sequence:" )
print ( gchn )
newchn = { ctype = "P", fasta = gchn, fastab = gchn, start = minseg, stop = maxseg, len = totlen, chainid = "A", mute = totmut, }
end
if peptides then
local mrgchn = {}
for ii = 1, #chnTbl do
-- TODO: rewrite the table
end
end
while GetParameters ( chnTbl, peptides, gchn, minseg, maxseg, totlen, totmut, 1 ) > 0 do
for ii = 1, #chnTbl do
local chain = chnTbl [ ii ]
if chain.fasta ~= chain.fastab then
print ( "--" )
print ( "chain " .. chain.chainid .. " changed" )
local old = chain.fastab
changeNum = changeNum + 1
local start_time = os.time ()
behavior.SetFiltersDisabled ( true )
local sChg, sErr = setChain ( chnTbl [ ii ] )
behavior.SetFiltersDisabled ( false )
print ( "segments changed = " .. sChg .. ", errors = " .. sErr )
print ( "old chain " .. chain.chainid .. ": " )
print ( old )
print ( "new chain " .. chain.chainid .. ": " )
print ( chain.fastab )
end
end
end
cleanup ()
end
function cleanup ( errmsg )
--
-- do not loop if cleanup causes an error
--
if CLEANUPENTRY ~= nil then
return
end
CLEANUPENTRY = true
print ( "---" )
local reason
local start, stop, line, msg
if errmsg == nil then
reason = "complete"
else
start, stop, line, msg = errmsg:find ( ":(%d+):%s()" )
if msg ~= nil then
errmsg = errmsg:sub ( msg, #errmsg )
end
if errmsg:find ( "Cancelled" ) ~= nil then
reason = "cancelled"
else
reason = "error"
end
end
print ( ReVersion .. " " .. reason )
print ( "Puzzle: " .. puzzle.GetName () )
local trk = ui.GetTrackName ()
if trk ~= "default" then
print ( "Track: " .. trk )
end
if reason == "error" then
print ( "Unexpected error detected" )
print ( "Error line: " .. line )
print ( "Error: \"" .. errmsg .. "\"" )
end
behavior.SetFiltersDisabled ( false )
end
xpcall ( main, cleanup )