Code
--[[
SelectoPro
Version 1.2 allows selecting segments by segment number, chain, or amino
acid. Segment number input uses the syntax of Timo van der Laan's segment
set logic.
Version 1.3 better implements segment set logic
Version 3.0 handles DNA/RNA.
select segments based on criteria
v1.0 - LociOiling - 2017/10/31
-- select mutable, locked, frozen, hydrophobic, hydrophilic, etc.
v1.1 - LociOiling - 2022/01/23
-- select monomer or complex core, boundary, surface
v1.2 - LociOiling - 2023/05/16
-- group global variables in tables
-- select by chain (detect chains)
-- select by segment number, or segment range
-- select by amino acid
-- cache selection info for performance
-- capture existing selections as set
-- trim down SLT (remove segment type functions)
-- scriptlog output of final selections
-- a ligand is just another segment
v1.3 - LociOiling - 2024/01/25
-- speed things up, work in segment sets instead of one-by-one
-- remove protein design features: mutable, simplex core, and complex core,
no more protein design puzzles, no reference to puzzle objectives now
-- allow selecting *unlocked"
-- split unlocked into backbone, sidechain
-- allow selecting *unfrozen*
-- split frozen into backbone, sidechain
v3.0 - LociOiling - 2025/02/23 - 2025/05/01
-- allow selecting DNA/RNA/protein (by chain)
-- move attributes to a button
-- speed things up for large puzzles
-- revamp protNfo
-- remove "include ligand" flag
-- bump to version 3.0 to match AA Edit, SS Edit
]]--
--
-- globals section
--
Recipe = "SelectoPro"
Version = "3.0"
ReVersion = Recipe .. " " .. Version
PST = {
sel = {}, -- selected segments
selSet = {}, -- selected segment set
selCnt = {}, -- initial selections
hasDNA = false, -- true if DNA chain(s) present
hasRNA = false, -- true if RNA chain(s) present
useaa = {}, -- list of AAs for this puzzle
}
local SPX = {
sUnlockedBB = false,
sUnlockedSC = false,
sUnfrozenBB = false,
sUnfrozenSC = false,
sPhobic = false,
sPhilic = false,
sChain = false, -- implied dialog value, select by chain
sAA = false, -- implied dialog value, select by amino acid
sSeg = false, -- implied dialog value, select by segment/range
ranges = "", -- ranges of segments as string
rtab = {}, -- ranges as a table
sKeep = false,
sInvert = false,
}
SLT = { -- SLT--SLT--SLT--SLT--SLT--SLT--SLT--SLT--SLT--SLT--
--[[
SLT - Segment set, list, and type module v0.7
Includes the segment set and list module and the segment type module
developed by Timo van der Laan.
The following Foldit recipes contain the original code for these modules:
* Tvdl enhanced DRW 3.1.1 - https://fold.it/portal/recipe/102840
* TvdL DRemixW 3.1.2 - https://fold.it/portal/recipe/102398
The "set and list" module performs logical operations and
transformations on tables containing ranges of segment.
The segment type module find lists and sets of segments with various
properties, such as selected or frozen.
A "list" is one-dimensional table containing segment numbers.
A "set" is a two-dimensional table containing segment number ranges.
For example, given a list of segments:
list = { 1, 2, 3, 7, 8, 11, 13, 14, 15 }
the corresponding set is:
set = { { 1, 3 }, { 7, 8 }, { 11, 11 }, {13, 15 } }
Most functions assume that the sets are well-formed,
meaning they are ordered and have no overlaps.
As an example, the method FindUnlocked returns a set of
all the unlocked segments in a puzzle. The method can be
called as follows:
funlocked = SLT:FindUnlocked ()
The return value funlocked is a two-dimensional table
containing ranges of unlocked segments.
In source format, the table might look like this:
funlocked = {
{ 27, 35, },
{ 47, 62, },
{ 78, 89, },
}
The code to use this table would look like:
--
-- for each range of segments
--
for ii = 1, #funlocked do
--
-- for each segment in the range, so something
--
for jj = funlocked [ ii ] [ 1 ], funlocked [ ii ] [ 2 ] do
... something ...
end
end
This psuedo-module is a table containing a mix of
data fields and methods. This wiki article explains
the packaging technique:
https://foldit.fandom.com/wiki/Lua_packaging_for_Foldit
Authorship
----------
Original by Timo van der Laan:
02-05-2012 TvdL Free to use for non commercial purposes
French comments by Bruno Kestemont and perhaps others.
v0.1 - LociOiling
+ extract and reformat code
v0.2 - LociOiling - 2017/11/03
+ add primary FindUnlocked function
v0.3 - LociOiling
+ add FindRotamers function
v0.4 - LociOiling - 2019/10/29
+ package as table
+ remove dependencies on segCnt and segCnt2
v0.5 - LociOiling - 2019/12/17
+ convert functions to methods, update internal references
v0.6 - LociOiling - 2022/04/18
+ add FindAAList
v0.7 - LociOiling - 2023/05/02
+ add FindWorkableList
]]--
--
-- variables
--
segCnt = nil, -- segment count, not adjusted for ligands
segCnt2 = nil, -- segment count, not including terminal ligands
--
-- initializer - can be called externally, but invoked inline if segCnt or segCnt2 are nil
--
Init = function ( self )
self.segCnt = structure.GetCount ()
self.segCnt2 = self.segCnt
while structure.GetSecondaryStructure ( self.segCnt2 ) == "M" do
self.segCnt2 = self.segCnt2 - 1
end
end,
--
-- segment set and list functions
--
SegmentListToSet = function ( self, list ) -- retirer doublons
local result = {}
local ff = 0
local ll = -1
table.sort ( list )
for ii = 1, #list do
if list [ ii ] ~= ll + 1 and list [ ii ] ~= ll then
-- note: duplicates are removed
if ll > 0 then
result [ #result + 1 ] = { ff, ll }
end
ff = list [ ii ]
end
ll = list [ ii ]
end
if ll > 0 then
result [ #result + 1 ] = { ff, ll }
end
return result
end,
SegmentSetToList = function ( self, set ) -- faire une liste a partir d'une zone
local result = {}
for ii = 1, #set do
for kk = set [ ii ] [ 1 ], set [ ii ] [ 2 ] do
result [ #result + 1 ] = kk
end
end
return result
end,
SegmentCleanSet = function ( self, set )
-- Makes it well formed
return self:SegmentListToSet ( self:SegmentSetToList ( set ) )
end,
SegmentInvertSet = function ( self, set, maxseg )
--
-- Gives back all segments not in the set
-- maxseg is added for ligand
--
local result={}
if maxseg == nil then
maxseg = structure.GetCount ()
end
if #set == 0 then
return { { 1, maxseg } }
end
if set [ 1 ] [ 1 ] ~= 1 then
result [ 1 ] = { 1, set [ 1 ] [ 1 ] - 1 }
end
for ii = 2, #set do
result [ #result + 1 ] = { set [ ii - 1 ] [ 2 ] + 1, set [ ii ] [ 1 ] - 1, }
end
if set [ #set ] [ 2 ] ~= maxseg then
result [ #result + 1 ] = { set [ #set ] [ 2 ] + 1, maxseg }
end
return result
end,
SegmentInvertList = function ( self, list )
if self.segCnt2 == nil then
self:Init ()
end
table.sort ( list )
local result = {}
for ii = 1, #list - 1 do
for jj = list [ ii ] + 1, list [ ii + 1 ] - 1 do
result [ #result + 1 ] = jj
end
end
for jj = list [ #list ] + 1, self.segCnt2 do
result [ #result + 1 ] = jj
end
return result
end,
SegmentInList = function ( self, seg, list ) -- verifier si segment est dans la liste
table.sort ( list )
for ii = 1, #list do
if list [ ii ] == seg then
return true
elseif list [ ii ] > seg then
return false
end
end
return false
end,
SegmentInSet = function ( self, set, seg ) --verifie si segment est dans la zone
for ii = 1, #set do
if seg >= set [ ii ] [ 1 ] and seg <= set [ ii ] [ 2 ] then
return true
elseif seg < set [ ii ] [ 1 ] then
return false
end
end
return false
end,
SegmentJoinList = function ( self, list1, list2 ) -- fusionner 2 listes de segments
local result = list1
if result == nil then
return list2
end
for ii = 1, #list2 do
result [ #result + 1 ] = list2 [ ii ]
end
table.sort ( result )
return result
end,
SegmentJoinSet = function ( self, set1, set2 ) --fusionner (ajouter) 2 zones
return self:SegmentListToSet ( self:SegmentJoinList ( self:SegmentSetToList ( set1 ), self:SegmentSetToList ( set2 ) ) )
end,
SegmentCommList = function ( self, list1, list2 ) -- chercher intersection de 2 listes
local result = {}
table.sort ( list1 )
table.sort ( list2 )
if #list2 == 0 then
return result
end
local jj = 1
for ii = 1, #list1 do
while list2 [ jj ] < list1 [ ii ] do
jj = jj + 1
if jj > #list2 then
return result
end
end
if list1 [ ii ] == list2 [ jj ] then
result [ #result + 1 ] = list1 [ ii ]
end
end
return result
end,
SegmentCommSet = function ( self, set1, set2 ) -- intersection de 2 zones
return self:SegmentListToSet ( self:SegmentCommList ( self:SegmentSetToList ( set1 ), self:SegmentSetToList ( set2 ) ) )
end,
SegmentSetMinus = function ( self, set1, set2 )
return self:SegmentCommSet ( set1, self:SegmentInvertSet ( set2 ) )
end,
SegmentPrintSet = function ( self, set )
print ( self:SegmentSetToString ( set ) )
end,
SegmentSetToString = function ( self, set ) -- pour pouvoir imprimer
local line = ""
for ii = 1, #set do
if ii ~= 1 then
line = line .. ", "
end
line = line .. set [ ii ] [ 1 ] .. "-" .. set [ ii ] [ 2 ]
end
return line
end,
SegmentSetInSet = function ( self, set, sub )
if sub == nil then
return true
end
--
-- Checks if sub is a proper subset of set
--
for ii = 1, #sub do
if not self:SegmentRangeInSet ( set, sub [ ii ] ) then
return false
end
end
return true
end,
SegmentRangeInSet = function ( self, set, range ) -- verifier si zone est dans suite
if range == nil or #range == 0 then
return true
end
local bb = range [ 1 ]
local ee = range [ 2 ]
for ii = 1, #set do
if bb >= set [ ii ] [ 1 ] and bb <= set [ ii ] [ 2 ] then
return ( ee <= set [ ii ] [ 2 ] )
elseif ee <= set [ ii ] [ 1 ] then
return false
end
end
return false
end,
SegmentSetToBool = function ( self, set ) --vrai ou faux pour chaque segment utilisable ou non
local result = {}
for ii = 1, structure.GetCount () do
result [ ii ] = self:SegmentInSet ( set, ii )
end
return result
end,
--
-- End of Segment Set module
--
--
-- Module Find Segment Types removed for space
--
}-- SLT--SLT--SLT--SLT--SLT--SLT--SLT--SLT--SLT--SLT--
--
-- SegmentStringToSet - convert user input to a segment set
--
-- This function is the logical inverse of SSL:SegmentSetToString.
--
-- User input is converted to a table containing ranges of segments.
--
-- User input is a comma-separated list of segment ranges or indivdual segments.
--
-- For example:
--
-- 12-23,47,65-67,69
--
-- or
--
-- 12,47,60
--
-- Segment numbers are validated such that 1 <= segnum <= structure.GetCount
--
-- Any stray characters are considered errors.
--
-- The SegmentSetToString returns a table containing the segment set, and a separarate table
-- containing error messages.
--
-- If there are errors, the segment set table is empty. The error table is empty is there are
-- no errors.
--
--
function SegmentStringToSet ( range )
local segCnt = structure.GetCount ()
local table = {}
local errz = {}
local function chkSeg ( seg, word, xtra1, xtra2, errz )
local ok = true
if seg == nil then
errz [ #errz + 1 ] = "error in \"" .. word .. "\", segment number must be numeric"
ok = false
elseif seg <= 0 then
errz [ #errz + 1 ] = "error in \"" .. word .. "\", segment number must be positive"
ok = false
elseif seg > segCnt then
errz [ #errz + 1 ] = "error in \"" .. word .. "\", segment greater than max " .. segCnt
ok = false
elseif xtra1:len () > 0 then
errz [ #errz + 1 ] = "error in \"" .. word .. "\", extra characters \"" .. xtra1 .. "\""
ok = false
elseif xtra2:len () > 0 then
errz [ #errz + 1 ] = "error in \"" .. word .. "\", extra characters \"" .. xtra2 .. "\""
ok = false
end
return ok, errz
end
--
-- pass 1 - comma-separated items
--
for word in range:gmatch ( "([^,]+)" ) do
word = word:gsub("%s+", "") -- remove spaces
local rgt = {}
--
-- pass 2a - hyphen-separated segment range
--
-- In string.gmatch, patterns enclosed in parentheses are "captures", which are returned.
--
-- Both wanted and unwanted items are captured.
--
-- The pattern ([^%d]*) is used to capture any non-numerics before the first and second
-- numerics in the range (extra0 and extra2). The square brackets indicate a custom character
-- class, and the "^" inside the brackets indicates the inverse of what follows. "%d" means
-- indicate the class of all decimal digits, so "[^%d] is the class of anything that's not a
-- decimal digit. The "*" in the pattern matches zero or more occurences.
--
-- The pattern (%d+) captures both the first and second numerics in the range (num1 and num2).
--
-- The pattern ([^%-]-) captures anything that's not a hypen after the first numeric (extra2).
--
-- The pattern %- represents the hyphen, and is not a capture. A missing hyphen means no match.
--
-- Finally, the pattern (.*) captures anything after the second numeric (extra3).
--
-- extra0 num1 extra1 - extra2 num2 extra3
for x0, r1, x1, x2, r2, x3 in word:gmatch ( "([^%d]*)(%d+)([^%-]-)%-([^%d]*)(%d+)(.*)" ) do
local ok
local r1x = tonumber ( r1 )
ok, errz = chkSeg ( tonumber ( r1 ), word, x0, x1, errz )
if ok then
rgt [ #rgt + 1 ] = r1x
end
local r2x = tonumber ( r2 )
ok, errz = chkSeg ( r2x, word, x2, x3, errz )
if ok then
rgt [ #rgt + 1 ] = r2x
end
end
--
-- pass 2b - single segment if pass 2a fails
--
-- If the first gmatch doesn't find a match, a second gmatch looks for just a numeric.
--
-- Again, unwanted items are also captured, the captures are similar to phase 2.
--
if #rgt == 0 and #errz == 0 then
for x0, r1, x1 in word:gmatch ( "([^%d]*)(%d+)(.*)" ) do
print ( "r1 = " .. r1 .. ", x1 = " .. x1 )
local r1x = tonumber ( r1 )
ok, errz = chkSeg ( r1x, word, x0, x1, errz )
if ok then
rgt [ #rgt + 1 ] = r1x
end
end
end
--
-- validate and add to table
--
if #rgt == 1 then -- double up, making a range of one segment
rgt [ #rgt + 1 ] = rgt [ 1 ]
end
if #rgt > 0 then -- swap if segments out of order
if rgt [ 1 ] > rgt [ 2 ] then
rgt [ 1 ], rgt [ 2 ] = rgt [ 2 ], rgt [ 1 ] -- switch-a-rooney
end
table [ #table + 1 ] = rgt
end
end
--
-- catch-all, error if nothing matched input
--
if #table == 0 and #errz == 0 then
errz [ #errz + 1 ] = "error in \"" .. range .. "\", invalid input"
end
if #errz > 0 then
table = {}
end
return table, errz
end
protNfo = { -- protNfo--protNfo--protNfo--protNfo--protNfo--protNfo--protNfo
--[[
protNfo package version 0.7
protNfo is packaged as a psuedo-class or psuedo-module
containing a mix of data fields and functions
all entries must be terminated with a comma to keep Lua happy
the commas aren't necessary if only function definitions are present
versions
--------
0.3 - add chain detection from AA Edit 2.0
0.4 - add ligand detection from GetSeCount
0.4 - merges in the ligand logic from GetSeCount
0.5 - still a work in progress
0.6 - integrate AminoAcids table
0.7 - trim the info collected, remove atom count logic
]]--
--
-- AminoAcids
--
-- names and key properties of all known amino acids and nucleobases
--
-- Notes:
--
-- * commented entries (at the end) are not in Foldit
-- * one-letter amino acid code is the table key
-- * two-letter RNA and DNA nucleotides are also valid
-- * the fields in this table are now referenced by name
-- * the "unk" and "x" codes are considered protein, unless the segment is marked as
-- ligand in the secondary structure ( code "M" )
-- * acref is atom count mid-chain, used to detect multiple peptide chains
--
AminoAcids = {
a = { code = "a", ctype = "P", acref = 10, short = "Ala", long = "Alanine", hydrop = 1.8 },
c = { code = "c", ctype = "P", acref = 11, short = "Cys", long = "Cysteine", hydrop = 2.5 },
d = { code = "d", ctype = "P", acref = 12, short = "Asp", long = "Aspartate", hydrop = -3.5 },
e = { code = "e", ctype = "P", acref = 15, short = "Glu", long = "Glutamate", hydrop = -3.5 },
f = { code = "f", ctype = "P", acref = 20, short = "Phe", long = "Phenylalanine", hydrop = 2.8 },
g = { code = "g", ctype = "P", acref = 7, short = "Gly", long = "Glycine", hydrop = -0.4 },
h = { code = "h", ctype = "P", acref = 17, short = "His", long = "Histidine", hydrop = -3.2 },
i = { code = "i", ctype = "P", acref = 19, short = "Ile", long = "Isoleucine", hydrop = 4.5 },
k = { code = "k", ctype = "P", acref = 22, short = "Lys", long = "Lysine", hydrop = -3.9 },
l = { code = "l", ctype = "P", acref = 19, short = "Leu", long = "Leucine", hydrop = 3.8 },
m = { code = "m", ctype = "P", acref = 17, short = "Met", long = "Methionine ", hydrop = 1.9 },
n = { code = "n", ctype = "P", acref = 14, short = "Asn", long = "Asparagine", hydrop = -3.5 },
p = { code = "p", ctype = "P", acref = 15, short = "Pro", long = "Proline", hydrop = -1.6 },
q = { code = "q", ctype = "P", acref = 17, short = "Gln", long = "Glutamine", hydrop = -3.5 },
r = { code = "r", ctype = "P", acref = 24, short = "Arg", long = "Arginine", hydrop = -4.5 },
s = { code = "s", ctype = "P", acref = 11, short = "Ser", long = "Serine", hydrop = -0.8 },
t = { code = "t", ctype = "P", acref = 14, short = "Thr", long = "Threonine", hydrop = -0.7 },
v = { code = "v", ctype = "P", acref = 16, short = "Val", long = "Valine", hydrop = 4.2 },
w = { code = "w", ctype = "P", acref = 24, short = "Trp", long = "Tryptophan", hydrop = -0.9 },
y = { code = "y", ctype = "P", acref = 21, short = "Tyr", long = "Tyrosine", hydrop = -1.3 },
--
-- codes for ligands or modified amino acids
--
x = { code = "x", ctype = "P", acref = 0, short = "Xaa", long = "Unknown", hydrop = 0 },
unk = { code = "x", ctype = "P", acref = 0, short = "Xaa", long = "Unknown", hydrop = 0 },
--
-- bonus! RNA nucleotides
--
ra = { code = "a", ctype = "R", acref = 33, short = "a", long = "Adenine", hydrop = 0, },
rc = { code = "c", ctype = "R", acref = 31, short = "c", long = "Cytosine", hydrop = 0, },
rg = { code = "g", ctype = "R", acref = 34, short = "g", long = "Guanine", hydrop = 0, },
ru = { code = "u", ctype = "R", acref = 30, short = "u", long = "Uracil", hydrop = 0, },
--
-- bonus! DNA nucleotides
--
da = { code = "a", ctype = "D", acref = 0, short = "a", long = "Adenine", hydrop = 0, },
dc = { code = "c", ctype = "D", acref = 0, short = "c", long = "Cytosine", hydrop = 0, },
dg = { code = "g", ctype = "D", acref = 0, short = "g", long = "Guanine", hydrop = 0, },
dt = { code = "t", ctype = "D", acref = 0, short = "t", long = "Thymine", hydrop = 0, },
--
-- dusty attic! musty cellar! jumbled boxroom!
-- can't bear to part with these treasures
--
-- b = { code = "b", ctype = "P", acref = 10, short = "Asx", long = "Asparagine/Aspartic acid", hydrop = 0 },
-- j = { code = "j", ctype = "P", acref = 10, short = "Xle", long = "Leucine/Isoleucine", hydrop = 0 },
-- o = { code = "o", ctype = "P", acref = 10, short = "Pyl", long = "Pyrrolysine", hydrop = 0 },
-- u = { code = "u", ctype = "P", acref = 10, short = "Sec", long = "Selenocysteine", hydrop = 0 },
-- z = { code = "z", ctype = "P", acref = 10, short = "Glx", long = "Glutamine or glutamic acid", hydrop = 0 } ,
},
aalist = {}, -- list of AA codes
rnalist = {}, -- list of RNA codes
dnalist = {}, -- list DNA codes
Ctypes = {
P = "protein",
D = "DNA",
R = "RNA",
M = "ligand",
},
PROTEIN = "P",
LIGAND = "M",
RNA = "R",
DNA = "D",
UNKNOWN_AA = "x",
UNKNOWN_BASE = "xx",
HELIX = "H",
SHEET = "E",
LOOP = "E",
segCnt = 0, -- unadjusted segment count
segCnt2 = 0, -- segment count adjusted for terminal ligands
aa = {}, -- amino acid codes
ss = {}, -- secondary structure codes
ACRF = 4.0, -- alpha carbon reference distance (protein)
PRF = 8.0, -- phosphorus reference distance (RNA/DNA)
acdx = {}, -- alpha carbon distance
ctype = {}, -- segment type - P, M, R, D
first = {}, -- true if segment is first in chain
last = {}, -- true if segment is last in chain
fastac = {}, -- external code for FASTA-style output
short = {}, -- short name
long = {}, -- long name
chainid = {}, -- chain id
chainpos = {}, -- position in chain
chains = {}, -- summary of chains
ligands = {}, -- ligand table
DEBUG = false,
round = function ( ii )
return ii - ii % 0.001
end,
--
-- get a chain id - works for A through ZZ, after that returns "??"
--
getchid = function ( ndx )
local chainid = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" }
local chmod = ( ndx - 1 ) % #chainid
local chid = chainid [ chmod + 1 ]
local chquo = math.floor ( ( ndx - 1 ) / #chainid )
if chquo > 0 then
if chquo + 1 <= #chainid then
chid = chainid [ chquo + 1 ] .. chid
else
chid = "!!"
end
end
return chid
end,
getChains = function ( self )
--
-- getChains - build a table of the chains found
--
-- Most Foldit puzzles contain only a single protein (peptide) chain.
-- A few puzzles contain ligands, and some puzzles have had two
-- protein chains. Foldit puzzles may also contain RNA or DNA.
--
-- For proteins, the atom count can be used to identify the first
-- (N terminal) and last (C terminal) ends of the chain. The AminoAcids
-- table has the mid-chain atom counts for each amino acid.
--
-- Cysteine is a special case, since the presence of a disulfide
-- bridge also changes the atom count.
--
-- For DNA and RNA, the beginning and end of the chain is determined
-- by context at present. For example, if the previous segment was protein
-- and this segment is DNA, it's the start of a chain.
--
-- Each ligand is treated as a chain of its own, with a length of 1.
--
-- chain table entries
-- -------------------
--
-- ctype - chain type - "P" for protein, "M" for ligand, "R" for RNA, "D" for DNA
-- fasta - FASTA-format sequence, single-letter codes (does not include FASTA header)
-- start - Foldit segment number of sequence start
-- stop - Foldit segment number of sequence end
-- len - length of sequence
-- chainid - chain id assigned to entry, "A", "B", "C", and so on
--
-- For DNA and RNA, fasta contains single-letter codes, so "a" for adenine.
-- The codes overlap the amino acid codes (for example, "a" for alanine).
-- The DNA and RNA codes must be converted to the appropriate two-letter codes Foldit
-- uses internally, for example "ra" for RNA adenine and "da" for DNA adenine.
--
local chainz = {}
local chindx = 0
local curchn = nil
for ii = 1, self.segCnt do
if self.first [ ii ] then
chindx = chindx + 1
chainz [ chindx ] = {}
curchn = chainz [ chindx ]
curchn.ctype = self.ctype [ ii ]
curchn.fasta = ""
curchn.ss = ""
curchn.start = ii
curchn.stop = ii
curchn.chainid = protNfo.getchid ( chindx )
curchn.len = 0
end
curchn.fasta = curchn.fasta .. self.fastac [ ii ]
curchn.ss = curchn.ss .. self.ss [ ii ]
self.chainid [ #self.chainid + 1 ] = curchn.chainid
self.chainpos [ #self.chainpos + 1 ] = ii - curchn.start + 1
if self.last [ ii ] then
curchn.stop = ii
curchn.len = curchn.stop - ( curchn.start - 1 )
end
end
return chainz
end,
getLigands = function ( self )
--
-- ultra-paranoid method for detecting ligands
--
-- each ligand segment is treated separately in this version
--
local ligandz = {}
for ii = 1, self.segCnt do
if self.ss [ ii ] == "M" then
local atoms = structure.GetAtomCount ( ii )
local rots = rotamer.GetCount ( ii )
local sscor = current.GetSegmentEnergyScore ( ii )
ligandz [ #ligandz + 1 ] = { seg = ii, atoms = atoms, rots = rots, score = sscor }
end
end
if self.DEBUG then
print ( #ligandz .. " ligands" )
for jj = 1, #ligandz do
print ( "ligand # "
.. jj ..
", segment = "
.. ligandz [ jj ].seg ..
", atoms = "
.. ligandz [ jj ].atoms ..
", rotamers = "
.. ligandz [ jj ].rots ..
", score = "
.. self.round ( ligandz [ jj ].score )
)
if ligandz [ jj ].seg < self.segCnt2 then
print ( "WARNING: non-standard ligand at segment "
.. ligandz [ jj ].seg ..
", most ligand-aware recipes won't work properly" )
end
end
end
return ligandz
end,
setNfo = function ( self )
self.segCnt = structure.GetCount()
--
-- standard ligand adjustment
--
self.segCnt2 = self.segCnt
while self.ss [ self.segCnt2 ] == "M" do
self.segCnt2 = self.segCnt2 - 1
end
if self.segCnt2 == self.segCnt then
print ( "segment count = " .. self.segCnt )
else
print ( "original segment count = " .. self.segCnt )
print ( "adjusted segment count = " .. self.segCnt2 )
end
--
-- partition AminoAcids for display purposes
--
for key, value in pairs ( protNfo.AminoAcids ) do
if value.ctype == self.PROTEIN then
self.aalist [ #self.aalist + 1 ] = key
elseif value.ctype == self.RNA then
self.rnalist [ #self.rnalist + 1 ] = key
elseif value.ctype == self.DNA then
self.dnalist [ #self.dnalist + 1 ] = key
end
end
--
-- initial scan - retrieve basic info from Foldit and AminoAcids table
--
for ii = 1, self.segCnt do
self.aa [ #self.aa + 1 ] = structure.GetAminoAcid ( ii )
self.ss [ #self.ss + 1 ] = structure.GetSecondaryStructure ( ii )
--
-- look it up
--
local aatab = self.AminoAcids [ self.aa [ ii ] ]
if aatab ~= nil then
self.ctype [ #self.ctype + 1 ] = aatab.ctype
--
-- even the codes 'x' or 'unk' are considered protein
-- unless the secondary structure is "M"
--
-- this handles glycosylated amino acids
-- in puzzles 879, 1378b, and similar
--
-- segment 134 in puzzle 879 is the example,
-- it's no longer asparagine, but it is part of
-- the peptide chain
--
if self.ss [ ii ] == self.LIGAND then
self.ctype [ ii ] = self.LIGAND
end
--
-- other info
--
else
--
-- special case: unknown code - mark it as ligand
--
-- this should not occur, but just in case
--
self.ctype [ #self.ctype + 1 ] = self.LIGAND
aa = self.UNKNOWN_AA -- a known unknown
aatab = self.AminoAcids [ aa ]
end
--
-- get distance
--
if ii < self.segCnt then
protNfo.acdx [ #protNfo.acdx + 1 ] = structure.GetDistance ( ii, ii + 1 )
else
protNfo.acdx [ #protNfo.acdx + 1 ] = 10000
end
--
-- save values from amino acids table
--
self.short [ #self.short + 1 ] = aatab.short
self.long [ #self.long + 1 ] = aatab.long
self.fastac [ #self.fastac + 1 ] = aatab.code
self.first [ #self.first + 1 ] = false
self.last [ #self.last + 1 ] = false
end -- end of initial scan
--
-- to determine first and last in chain for all types,
-- based on change in type (control break)
--
for ii = 1, self.segCnt do
if ii == 1 then
self.first [ ii ] = true
elseif ii == self.segCnt then
self.last [ ii ] = true
else
if self.ctype [ ii ] ~= self.ctype [ ii - 1 ] then
self.first [ ii ] = true
end
if self.ctype [ ii ] ~= self.ctype [ ii + 1 ] then
self.last [ ii ] = true
end
end
if self.ctype [ ii ] == self.LIGAND then
self.first [ ii ] = true
self.last [ ii ] = true
end
if self.first [ ii ] and self.DEBUG then
print ( "chain start at segment " .. ii .. ", type = " .. self.Ctypes [ self.ctype [ ii ] ] )
end
if self.last [ ii ] and self.DEBUG then
print ( "chain end at segment " .. ii .. ", type = " .. self.Ctypes [ self.ctype [ ii ] ] )
end
end
--
-- look for chain breaks based on distances
--
for ii = 1, self.segCnt do
local stype = self.ctype [ ii ] -- type of this segment
local gref = 0 -- gap reference distance
if stype == self.PROTEIN then
gref = self.ACRF
elseif stype == self.DNA then
gref = self.PRF
elseif stype == self.RNA then
gref = self.PRF
end
--
-- up until last segment
--
if ii < self.segCnt then
if self.ctype [ ii + 1 ] == stype then
if self.acdx [ ii ] > gref then
self.last [ ii ] = true
if self.DEBUG then
print ( "chain end at " .. ii .. " due to gap" )
end
end
end
end
--
-- after first segment
--
if ii > 1 then
if self.ctype [ ii - 1 ] == stype then
if self.acdx [ ii - 1 ] > gref then
self.first [ ii ] = true
if self.DEBUG then
print ( "chain start at " .. ii .. " due to gap" )
end
end
end
end
end
--
-- summarize the chain info
--
self.chains = self:getChains ()
--
-- get the ligand info
--
self.ligands = self:getLigands ()
end,
} -- protNfo--protNfo--protNfo--protNfo--protNfo--protNfo--protNfo
function GetParameters ()
local uerror = {}
local rc
repeat
local d = dialog.CreateDialog ( ReVersion )
d.selcnt = dialog.AddLabel ( PST.selCnt .. " segments selected initially" )
d.ranges = dialog.AddTextbox ( "Selections", SPX.ranges )
d.segmsg1 = dialog.AddLabel ( "Selections as segment ranges or single segments, " )
d.segmsg2 = dialog.AddLabel ( "specified as a comma-separated list, " )
d.segmsg3 = dialog.AddLabel ( "for example: 1-20,17,21,35-59" )
d.s0 = dialog.AddLabel ( "" )
d.l5 = dialog.AddLabel ( "" )
d.sKeep = dialog.AddCheckbox ( "Keep existing selections?", SPX.sKeep )
d.sInvert = dialog.AddCheckbox ( "Invert new selections?", SPX.sInvert )
if #uerror > 0 then
d.lerr = dialog.AddLabel ( "" )
for ii = 1, #uerror do
d [ "lerr" .. ii ] = dialog.AddLabel ( uerror [ ii ] )
end
end
d.ok = dialog.AddButton ( "OK" , 1 )
if #protNfo.chains > 1 then
d.chains = dialog.AddButton ( "Chains" , 2 )
end
if PST.hasDNA or PST.hasRNA then
d.AAs = dialog.AddButton ( "AAs\/Bases", 3 )
else
d.AAs = dialog.AddButton ( "AAs", 3 )
end
d.attrs = dialog.AddButton ( "Attrs", 4 )
d.cancel = dialog.AddButton ( "Cancel" , 0 )
rc = dialog.Show ( d )
uerror = {}
if rc == 2 then
GetChains ( 1 )
end
if rc == 3 then
GetAAs ( 1 )
end
if rc == 4 then
GetAttrs ()
end
if rc > 0 then
SPX.sKeep = d.sKeep.value
SPX.sInvert = d.sInvert.value
SPX.ranges = d.ranges.value
if SPX.ranges:len() > 0 then
local terror = {}
SPX.rtab, terror = SegmentStringToSet ( SPX.ranges )
if #terror == 0 then
SPX.rtab = SLT:SegmentCleanSet ( SPX.rtab )
else
for ii = 1, #terror do
uerror [ #uerror + 1 ] = terror [ ii ]
end
end
end
end
until rc <= 1 and #uerror == 0
return rc
end
function GetAttrs ()
local uerror = {}
local rc
repeat
local d = dialog.CreateDialog ( ReVersion .. " misc. attributes" )
d.l0 = dialog.AddLabel ( "Select segments if..." )
d.sUnlockedBB = dialog.AddCheckbox ( "Unlocked backbone?", SPX.sUnlockedBB )
d.sUnlockedSC = dialog.AddCheckbox ( "Unlocked sidechain?", SPX.sUnlockedSC )
d.sUnfrozenBB = dialog.AddCheckbox ( "Unfrozen backbone?", SPX.sUnfrozenBB )
d.sUnfrozenSC = dialog.AddCheckbox ( "Unfrozen sidechain?", SPX.sUnfrozenSC )
d.sPhobic = dialog.AddCheckbox ( "Hydrophobic?", SPX.sPhobic )
d.sPhilic = dialog.AddCheckbox ( "Hydrophilic?", SPX.sPhilic )
d.l5 = dialog.AddLabel ( "" )
if #uerror > 0 then
d.lerr = dialog.AddLabel ( "" )
for ii = 1, #uerror do
d [ "lerr" .. ii ] = dialog.AddLabel ( uerror [ ii ] )
end
end
d.ok = dialog.AddButton ( "OK" , 1 )
d.cancel = dialog.AddButton ( "Cancel" , 0 )
rc = dialog.Show ( d )
uerror = {}
if rc > 0 then
SPX.sUnlockedBB = d.sUnlockedBB.value
SPX.sUnlockedSC = d.sUnlockedSC.value
SPX.sUnfrozenBB = d.sUnfrozenBB.value
SPX.sUnfrozenSC = d.sUnfrozenSC.value
SPX.sPhobic = d.sPhobic.value
SPX.sPhilic = d.sPhilic.value
end
until rc <= 1 and #uerror == 0
return rc
end
function GetChains ( chndx )
if chndx == nil then
chndx = 1
end
local CHPAGE = 8
local rc = 0
local chmax = math.min ( #protNfo.chains, chndx + CHPAGE - 1 )
local d = dialog.CreateDialog ( ReVersion .. " Chains" )
d.l1 = dialog.AddLabel ( "Displaying chains " .. chndx .. "-" .. chmax .. " of " .. #protNfo.chains )
for ii = chndx, chmax do
local cs = protNfo.chains [ ii ]
d [ "chn" .. ii .. "l1" ] = dialog.AddCheckbox (
"Chain "
.. cs.chainid ..
" (" .. protNfo.Ctypes [ cs.ctype ] .. "): "
.. cs.start ..
"-"
.. cs.stop ..
", length = "
.. cs.len,
cs.sel
)
end
d.ok = dialog.AddButton ( "OK" , 1 )
if chndx > 1 then
d.prev = dialog.AddButton ( "Prev", 2 )
end
if chmax < #protNfo.chains then
d.next = dialog.AddButton ( "Next", 3 )
end
d.cancel = dialog.AddButton ( "Cancel" , 0 )
repeat
rc = dialog.Show ( d )
if rc > 0 then
for ii = chndx, chmax do
local cs = protNfo.chains [ ii ]
cs.sel = d [ "chn" .. ii .. "l1" ].value
end
end
if rc == 2 then
rc = GetChains ( chndx - CHPAGE )
end
if rc == 3 then
rc = GetChains ( chndx + CHPAGE )
end
until rc < 2
return rc
end
function GetAAs ( aaindx )
local AAMPAGE = 10 -- amino acids / page
local rc = 0
local dname = "AAs"
if PST.hasDNA or PST.hasRNA then
dname = dname .. "\/Bases"
end
local ask = dialog.CreateDialog ( ReVersion .. " " .. dname )
local aamax = math.min ( #PST.useaa, aaindx + AAMPAGE - 1 )
ask.AADisp = dialog.AddLabel ( "displaying " .. aaindx .. " - " .. aamax .. " of " .. #PST.useaa )
local aacnt = 0
for ii = aaindx, aamax do
local aawork = PST.useaa [ ii ]
aacnt = aacnt + 1
local aalabel = ""
.. string.upper ( aawork ) ..
" ("
.. protNfo.AminoAcids [ aawork ].short ..
") - "
.. protNfo.AminoAcids [ aawork ].long
ask [ "AASEL" .. aacnt ] = dialog.AddCheckbox ( aalabel, protNfo.AminoAcids [ aawork ].sel )
end
ask.OK = dialog.AddButton ( "OK", 1 )
if aaindx > 1 then
ask.prev = dialog.AddButton ( "Prev", 2 )
end
if aamax < #PST.useaa then
ask.next = dialog.AddButton ( "Next", 3 )
end
ask.Cancel = dialog.AddButton ( "Cancel", 0 )
repeat
rc = dialog.Show ( ask )
if rc > 0 then
aacnt = 0
for ii = aaindx, aamax do
local aawork = PST.useaa [ ii ]
aacnt = aacnt + 1
protNfo.AminoAcids [ aawork ].sel = ask [ "AASEL" .. aacnt ].value
if aacnt > aamax then
break
end
end
end
if rc == 2 then
rc = GetAAs ( aaindx - AAMPAGE )
end
if rc == 3 then
rc = GetAAs ( aaindx + AAMPAGE )
end
until rc < 2
return rc
end
function Init ()
protNfo:setNfo () -- get complete protein info
local tsel = {}
for ii = 1, protNfo.segCnt do
PST.sel [ #PST.sel + 1 ] = selection.IsSelected ( ii )
if PST.sel [ #PST.sel ] then
tsel [ #tsel + 1 ] = ii
end
end
PST.selCnt = #tsel
if PST.selCnt > 0 then
SPX.sKeep = true
end
PST.selSet = SLT:SegmentListToSet ( tsel )
SPX.ranges = SLT:SegmentSetToString ( PST.selSet ) -- "ranges" starts with existing selections
if #protNfo.chains == 1 then
print ( "single chain" )
else
print ( #protNfo.chains.. " chains found" )
end
for ii = 1, #protNfo.chains do
print ( "chain " .. protNfo.chains [ ii ].chainid ..
" (" .. protNfo.Ctypes [ protNfo.chains [ ii ].ctype ] ..
"), segments " .. protNfo.chains [ ii ].start .. "-" .. protNfo.chains [ ii ].stop .. ", length = " .. protNfo.chains [ ii ].len )
if protNfo.chains [ ii ].ctype == "R" then
PST.hasRNA = true
end
if protNfo.chains [ ii ].ctype == "D" then
PST.hasDNA = true
end
end
for ii = 1, #protNfo.aalist do
PST.useaa [ #PST.useaa + 1 ] = protNfo.aalist [ ii ]
end
if PST.hasRNA then
for ii = 1, #protNfo.aalist do
PST.useaa [ #PST.useaa + 1 ] = protNfo.rnalist [ ii ]
end
end
if PST.hasDNA then
for ii = 1, #protNfo.aalist do
PST.useaa [ #PST.useaa + 1 ] = protNfo.dnalist [ ii ]
end
end
print ( "--" )
end
function chainCheck ( seg ) -- returns true if seg is in a selected chain
for ii = 1, #protNfo.chains do
if protNfo.chains [ ii ].sel
and seg >= protNfo.chains [ ii ].start
and seg <= protNfo.chains [ ii ].stop then
return true
end
end
return false
end
function aaCheck ( seg ) -- returns true if seg is a selected amino acid
local aax = protNfo.AminoAcids [ protNfo.aa [ seg ] ]
if aax ~= nil and aax.sel then
return true
end
return false
end
function main ()
print ( ReVersion )
print ( "Puzzle: " .. puzzle.GetName () )
local trk = ui.GetTrackName ()
if trk ~= "default" then
print ( "Track: " .. trk )
end
Init ()
if not GetParameters () then
return
end
print ( "options:" )
if SPX.sUnlockedBB then
print ( "select unlocked backbone" )
end
if SPX.sUnlockedSC then
print ( "select unlocked sidechain" )
end
if SPX.sUnfrozen then
print ( "select frozen" )
end
if SPX.sPhobic then
print ( "select hyrdophobics" )
end
if SPX.sPhilic then
print ( "select hydrophilics" )
end
if #protNfo.chains > 1 then
local chsels = 0
local chainSel = ""
for ii = 1, #protNfo.chains do
if protNfo.chains [ ii ].sel then --
chsels = chsels + 1
if chsels > 1 then
chainSel = chainSel .. ", "
end
chainSel = chainSel .. protNfo.chains [ ii ].chainid
end
end
if chsels > 0 then
SPX.sChain = true
cwd = "chain"
if chsels > 1 then
cwd = "chains"
end
print ( "select " .. chsels .. " " .. cwd .. " (" .. chainSel .. ")" )
end
end
local aacnt = 0
local aasel = 0
local aaStr = ""
for key, value in pairs ( protNfo.AminoAcids ) do
aacnt = aacnt + 1
if protNfo.AminoAcids [ key ].sel then
aasel = aasel + 1
if aasel > 1 then
aaStr = aaStr .. ", "
end
aaStr = aaStr .. key
end
end
if aasel > 0 then
SPX.sAA = true
local aawd = "amino acid"
if aasel > 1 then
aawd = "amino acids"
end
print ( "select " .. aasel .. " " .. aawd .. " (" .. aaStr .. ")" )
end
if #SPX.rtab > 0 then
SPX.sSeg = true
print ( "select segments " .. SLT:SegmentSetToString ( SPX.rtab ) )
end
if SPX.sKeep then
print ( "keep existing selections" )
end
if SPX.sInvert then
print ( "invert new selections" )
end
print ( "--" )
--
-- do it
--
local sels = 0
if not SPX.sKeep then
selection.DeselectAll ()
for ii = 1, protNfo.segCnt do
PST.sel [ ii ] = false
end
print ( "existing selections cleared" )
else
for ii = 1, protNfo.segCnt do
if selection.IsSelected ( ii ) then
sels = sels + 1
end
end
print ( sels .. " existing selections" )
end
local fsels = {} -- fsels is a segment set list of what's to be selected
for ii = 1, protNfo.segCnt do
local sellit = false
local lbb = false
local lsc = false
if SPX.sUnlockedBB or SPX.sUnlockedSC then
lbb, lsc = structure.IsLocked ( ii )
end
if SPX.sUnlockedBB and not lbb and not SPX.sInvert then
sellit = true
end
if SPX.sUnlockedBB and lbb and SPX.sInvert then
sellit = true
end
if SPX.sUnlockedSC and not lsc and not SPX.sInvert then
sellit = true
end
if SPX.sUnlockedSC and lsc and SPX.sInvert then
sellit = true
end
local fbb = false
local fsc = false
if SPX.sUnfrozenBB or SPX.sUnfrozenSC then
fbb, fsc = freeze.IsFrozen ( ii )
end
if SPX.sUnfrozenBB and not fbb and not SPX.sInvert then
sellit = true
end
if SPX.sUnfrozenBB and fbb and SPX.sInvert then
sellit = true
end
if SPX.sUnfrozenSC and not fsc and not SPX.sInvert then
sellit = true
end
if SPX.sUnfrozenSC and fsc and SPX.sInvert then
sellit = true
end
if SPX.sPhobic and structure.IsHydrophobic ( ii ) and not SPX.sInvert then
sellit = true
end
if SPX.sPhobic and not structure.IsHydrophobic ( ii ) and SPX.sInvert then
sellit = true
end
if SPX.sPhilic and not structure.IsHydrophobic ( ii ) and not SPX.sInvert then
sellit = true
end
if SPX.sPhilic and structure.IsHydrophobic ( ii ) and SPX.sInvert then
sellit = true
end
if SPX.sChain and chainCheck ( ii ) and not SPX.sInvert then
sellit = true
end
if SPX.sChain and not chainCheck ( ii ) and SPX.sInvert then
sellit = true
end
if SPX.sAA and aaCheck ( ii ) and not SPX.sInvert then
sellit = true
end
if SPX.sAA and not aaCheck ( ii ) and SPX.sInvert then
sellit = true
end
if SPX.sSeg and SLT:SegmentInSet ( SPX.rtab, ii ) and not SPX.sInvert then
sellit = true
end
if SPX.sSeg and not SLT:SegmentInSet ( SPX.rtab, ii ) and SPX.sInvert then
sellit = true
end
if sellit then
fsels [ #fsels + 1 ] = ii
end
end
local iranges = SLT:SegmentSetToString ( PST.selSet )
if iranges:len () == 0 then
iranges = "none"
end
print ( "initial selections: " .. iranges )
--
-- do the actual selecting here using ranges
--
local frangez = SLT:SegmentListToSet ( fsels )
local franges = SLT:SegmentSetToString ( frangez )
if franges:len () == 0 then
franges = "none"
end
print ( "final selections: " .. franges )
for ii = 1, #frangez do
selection.SelectRange ( frangez [ ii ] [ 1 ], frangez [ ii ] [ 2 ] )
end
--
-- determine number of changed segments
--
local cnt = 0
for ii = 1, protNfo.segCnt do
if selection.IsSelected ( ii ) and not PST.sel [ ii ] then
cnt = cnt + 1
end
end
--
-- determine number of errors
--
local errs = 0
for ii = 1, #fsels do
if not selection.IsSelected ( fsels [ ii ] ) then
errs = errs + 1
end
end
print ( cnt .. " selections added" )
if errs > 0 then
print ( errs .. " selection errors (locked segments, etc.) " )
end
print ( sels + cnt .. " total segments selected" )
cleanup ()
end
function cleanup ( errmsg )
if CLEANUPENTRY ~= nil then
return
end
CLEANUPENTRY = true
print ( "---" )
local reason
local start, stop, line, msg
if errmsg == nil then
reason = "complete"
else
--
-- civilized error reporting,
-- thanks to Bruno K. and Jean-Bob
--
start, stop, line, msg = errmsg:find ( ":(%d+):%s()" )
if msg ~= nil then
errmsg = errmsg:sub ( msg, #errmsg )
end
if errmsg:find ( "Cancelled" ) ~= nil then
reason = "cancelled"
else
reason = "error"
end
end
print ( ReVersion .. " " .. reason )
print ( "Puzzle: " .. puzzle.GetName () )
local trk = ui.GetTrackName ()
if trk ~= "default" then
print ( "Track: " .. trk )
end
if reason == "error" then
print ( "Unexpected error detected" )
print ( "Error line: " .. line )
print ( "Error: \"" .. errmsg .. "\"" )
end
end
xpcall ( main , cleanup )