| Bioinformatics Consulting | Bioinformatics email | Phone | Personal Home Page | Personal email | 
Once all these libraries are compiled and installed, it is possible to start compiling the JESAM tools. Note that depending upon your compilation choices for the three libraries (static/dynamic) you may need to edit both your PATH, and LD_LIBRARY_PATH or LD_LIBRARYN32_PATH environment variables before being able to compile or use the JESAM tools. PVM also requires access to two or three main environment variables including $PVM_ROOT, and $PVM_ARCH which must be specified during PVM library compilation.
|  |  | 
| source ~user/JESAM/JESAM_CSHRC | Set environment variables | 
| pvm | Set the PVM daemon running you can type "quit" to get your console back) | 
| jesam -anti-sense y -idbases 5 -disbases -15 -gapstart -15 -gapcont -15 -index index.new -cache 2 -slavehost remote -slaves 4 -seqfiles all.sequences | Do pairwise comparisons and store the results | 
| jesamaligndumper index.new | Take a quick peek at summaries of the pairwise alignments | 
| alignserver index.new alignmentDB > & /dev/null & | Start up a CORBA alignment server | 
| Command line parameter | Meaning | 
| -anti-sense [y/n] | Compare sequences in reversed and complemented orientation: yes or no | 
| -idbases n | Score n for two matching bases (n is an integer) | 
| -disbases n | penalty n for two non-aligned, non-n bases | 
| -gapstart n | gapstart penalty | 
| -gapcont n | length-dependent gap penalty | 
| -index name | name is the name of a directory to append to or create to hold the alignment database files | 
| -cache n | Berkeley DB cache sizing control. Values from 1 to 5 are normal for 100,000 sequences and 500MB RAM server increasing to 10 for >1GB server and >200,000 sequences | 
| -slavehost remote | force slaves to spawn off the starting host. Omit this option and the default action is to spawn slaves locally | 
| -slaves n | Number of slaves to start | 
| -seqfiles file1 file2 | list of names of sequence files to be added to alignment database | 
|  |  | 
| java embl.ebi.est.alignment.AlignmentBrowser file:$PWD/alignmentDB.ior | Test the alignment server and browse for problems. Ask for sequence with internal ID 0 | 
| ( nameserv --ior > naming.ior ) > & /dev/null & or the Java version below java com.ooc.CosNaming.Server -i -OAport 10000 > naming.ior) >& /dev/null | Start a naming service if one is not running already. With ORBacus it is possible to define a particular port for either the Java, or C naming service to assist persistence | 
| java -mx100M embl.ebi.est.cluster.SuperClusterServer file:$PWD/naming.ior file:$PWD/alignmentDB.ior clusterDB.ior clusterdb human >& /dev/null & | Start up the supercluster server which will check for the addition of new sequence alignments and publish the alignment server and cluster service with the naming service. | 
| java -mx100M embl.ebi.est.alignment.PickerApplet file:$PWD/naming.ior | Start up a browser for all species, alignments, and clusters | 
# Try and initialize everything need to compile and run JASM
# Author Jeremy Parsons 
# Date 29th January 1999 
# General Stuff
if (! $?LD_LIBRARY_PATH) setenv LD_LIBRARY_PATH "."
setenv PROGS_DIR /home/radon/EST/progs
# Java Stuff
if (! $?CLASSPATH) setenv CLASSPATH "."
# PVM stuff
setenv PVM_ROOT ${PROGS_DIR}/pvm3
setenv PVM_DPATH ${PVM_ROOT}/lib/pvmd
setenv PVM_ARCH SUN4SOL2
setenv PATH ${PATH}:$PVM_ROOT/lib/$PVM_ARCH
setenv PATH ${PATH}:$PVM_ROOT/bin/$PVM_ARCH
# ORBacus stuff
setenv JOB_HOME_DIR ${PROGS_DIR}/JOB.latest
setenv CLASSPATH $JOB_HOME_DIR/lib/OB.jar:${CLASSPATH}
setenv CLASSPATH $JOB_HOME_DIR/lib/OBEvent.jar:$JOB_HOME_DIR/lib/OBNaming.jar:$JOB_HOME_DIR/lib/OBProperty.jar:${CLASSPATH}
setenv OB_HOME_DIR ${PROGS_DIR}/OB.latest
setenv PATH ${OB_HOME_DIR}/bin:${PATH}
setenv LD_LIBRARY_PATH ${OB_HOME_DIR}/lib:${LD_LIBRARY_PATH}
# Specific jars needed for JASM clustering etc
setenv CLASSPATH ${PROGS_DIR}/jars/PickerBrowser.jar:${CLASSPATH}
# Additions to your path for the applications
setenv PATH ${PATH}:${PROGS_DIR}/bin
#!/bin/sh
# Author Jeremy D. Parsons
# November 1998
# December 1998 Changed to allow specification of embl or emblnew
# ===============================================================
# A script that automates some of the sequence retrieval steps needed to
# 1) get seqs from SRS by species, molecule and date
# 2) reformat to make ID take value of ACC
# 3) Check to see how many genuinely new sequences have arrived
# 4) Delete sequences used already.
# 5) write a single file species.date.embl
# 
# Assumes various filename conventions
# $ALL_KNOWN_ID_FILE - file of the Accessions of all sequences used so far
#
# This script will need extensions to discover last run-date and
# to handle local directory hierarchy.
# *** IMPORTANT ***
# This file needs to be owned by <SOMEONE> and have its setuid bit set.
# su <SOMEONE>; chmod u+s ; chmod go-w etc.
# ============================================================================
# General initializations
# ------------------------
VERSION="1.0"   # Jeremy's Original
PROGRAM_NAME=`echo $0 | sed 's/^.*\///'`;
SPECIES_DIR="/ebi/est/est2/projects";
SRS_SERVER="circinus";
GET_LATEST_SEQS_LOGFILE="${SPECIES_DIR}/${PROGRAM_NAME}.log";
ACCESSION2IDPROG="/homes/jparsons/bin/all/emblacc2id";
EMBL_REMOVAL_PROG="/homes/jparsons/bin/all/emblexclude";
#SRS_PREP_COMMAND="eval 'source /ebi/srs/srs5.1/etc/prep_srs'";
SRS_GETZ_BINARY="/ebi/srs/srs5.1/bin/irix64/getz";
TEMP_DIR="/tmp";
EMBL_FORMAT_EXT="embl";
NEW_ID_FILE_ROOT_NAME="new_seqs";
ALL_KNOWN_ID_FILE="known_seqs";
DUPLICATE_FILE_ROOT_NAME="duplicates";
UNIQUE_ID=$$;
TODAY=`date '+%Y%m%d'`
WARNING_LIMIT=100000;   # Disk Space (in k) left before issuing warning.
SPECIES_NAMES="human mouse rat zfish";
umask 002 # Make sure all members of this group can write to anything created.
# cd ${SPECIES_DIR};
# ========================================================================
print_usage () {
        echo "$0 [embl|emblnew] SRS_species_selector SRS_date_selector output_filename_head";
        }
# ========================================================================
# Say Hello 
# ---------
echo "# $PROGRAM_NAME Version ${VERSION}"
if [ $# -lt 4 ] 
then
        print_usage;
        exit 1;
fi
# Parse the command line parameters
srs_libs_name=$1
srs_species_name=$2;
srs_date_string=$3;
output_filename_head=$4;
# ============================================================================
echo "# Launching the rsh to get the sequences for $srs_species_name since $srs_date_string .";
SRS_output_file="${TEMP_DIR}/${output_filename_head}.${TODAY}.${UNIQUE_ID}";
# SRS_query="${SRS_GETZ_BINARY} -e '[libs={embl}-molecule: RNA] & [libs-Organism: ${srs_species_name}] & [libs-Date# ${srs_date_string}:]'"
SRS_query="${SRS_GETZ_BINARY} -e '[libs={${srs_libs_name}}-molecule: RNA] & [libs-Organism: ${srs_species_name}] & [libs-Date# ${srs_date_string}:]'"
# rsh ${SRS_SERVER} ${SRS_PREP_COMMAND} \>\& /dev/null\;${SRS_query} | ${ACCESSION2IDPROG} > ${SRS_output_file}
rsh ${SRS_SERVER} ${SRS_query} | ${ACCESSION2IDPROG} > ${SRS_output_file}
# ============================================================================
echo "# Checking for duplicates"
new_ID_file="${TEMP_DIR}/${NEW_ID_FILE_ROOT_NAME}.${UNIQUE_ID}";
known_ID_file="${SPECIES_DIR}/${output_filename_head}/${ALL_KNOWN_ID_FILE}";
duplicate_ID_file="${TEMP_DIR}/${DUPLICATE_FILE_ROOT_NAME}.${UNIQUE_ID}";
added_ID_file="${TEMP_DIR}/${NEW_ID_FILE_ROOT_NAME}.${TODAY}";
# final_seq_file="${TEMP_DIR}/${output_filename_head}.${TODAY}.${EMBL_FORMAT_EXT}";
final_seq_file="${SPECIES_DIR}/${output_filename_head}/${TODAY}.${EMBL_FORMAT_EXT}";
grep '^ID' ${SRS_output_file} | awk '{print $2}' | sort -u > ${new_ID_file};
retrieved_count=`cat ${new_ID_file} | wc -l`;
echo "# Total entries retrieved= $retrieved_count"
if [ ! -r ${known_ID_file} ] 
then
        touch ${known_ID_file};
fi
comm -12 ${new_ID_file} ${known_ID_file} > $duplicate_ID_file;
comm -23 ${new_ID_file} ${known_ID_file} > $added_ID_file;
new_count=`cat ${added_ID_file} | wc -l`;
$EMBL_REMOVAL_PROG ${duplicate_ID_file} ${SRS_output_file} > ${final_seq_file};
# Could check for agreement in counts here ???
# Backup the known Id file, then add the new list on
cp -p ${known_ID_file} ${known_ID_file}.bak
temp_ID_file="${known_ID_file}.${UNIQUE_ID}";
sort -u ${known_ID_file} ${added_ID_file} > ${temp_ID_file};
mv ${temp_ID_file} ${known_ID_file}
# ============================================================================
echo "# cleaning up temporary files"
rm -f ${SRS_output_file} ${new_ID_file} ${duplicate_ID_file} ${added_ID_file}
# =============================================================================
# ========================================================================
# Check that there is enough diskspace
# ---------------------------------
#if [ $biggest -lt $WARNING_LIMIT ] ; then
#       echo ""
#       echo "**** WARNING ******"
#       echo "Space is getting short on $biggest_dir."
#       echo ""
#fi 
# ========================================================================
# Record what has been done in a file
# -----------------------------------
if [ ! -f $GET_LATEST_SEQS_LOGFILE ] ; then
        echo "# Date\tSpecies\tAdded" > $GET_LATEST_SEQS_LOGFILE
        chmod 644 $GET_LATEST_SEQS_LOGFILE
fi
echo "${TODAY}\t${output_filename_head}\t${new_count}" >> $GET_LATEST_SEQS_LOGFILE
#!/bin/csh -x
# Author Jeremy Parsons
# =================================
# Bugs/Improvements needed:
# 1) This only works on sol4 (need NFS crossmounts)
# 2) Need checks to see if naming service is still running before starting.
# 3) Need to kill old servers if still running.
# ==================================
set CONF_SCRIPT_NAME="cluster_env_config"
set config_script=`which ${CONF_SCRIPT_NAME}`;
if ( -r $config_script) then 
        source $config_script;
else
        echo "Could not find the environment setup script.";
endif
# ========================================
# Parse parameters
if (${#argv} != 2) then
        echo "The script $0 was given the wrong number of parameters.";
        echo "Usage: $0 species_nickname(eg. mouse) select_from_date(eg. 19981231)";
        exit(1);
endif
set chosen_species=${argv[1]};
set chosen_date=${argv[2]};
set chosen_sp_index="${chosen_species}_index";
cd ${CLUSTER_HOME};
if (! -d ${chosen_species}) mkdir ${chosen_species};
cd ${chosen_species};
# ========================================
# Make a copy of the old indexes in case anything goes wrong but need space first
echo "# Making new backups of any current indexes for ${chosen_species}";
rm -rf "${chosen_sp_index}.bak";
# See if we have an old version that should be copied to a new
# Allow for option of links in new that spread files to diffferent disks
if ( -d ${chosen_sp_index}) then
        if (! -d ${chosen_sp_index}.new ) then
                mkdir ${chosen_sp_index}.new;
        endif
        cd ${chosen_sp_index};
        foreach file (*)
                cp -pf $file ../${chosen_sp_index}.new;
        end
        cd ..;
endif
# Should really check that copies worked but don't know how to do it easily for links
# =======================================================
# Get the new sequences
# Do not have hashes or structs so need to find SRS species definition the hard way
set temp_strings=(${KNOWN_SP_SRS_S});
foreach species (${KNOWN_SPECIES}) 
        if (${chosen_species} == ${species}) then
                break;
        else
                shift temp_strings;
        endif
end
set chosen_sp_srs_s=`echo ${temp_strings[1]} | tr '_' ' '`;
# In case of long overnight runs, set date string here
set todays_date=`date '+%Y%m%d'`;
set new_seqs_file="${todays_date}.embl";
getnewseqs emblnew "${chosen_sp_srs_s}" "${chosen_date}" "${chosen_species}"
# Check for output file
if (! -r ${new_seqs_file}) then
        echo "$0 Unable to find the file of new sequences for ${chosen_species}";
        exit(1);
endif
# ========================================================
# Start up PVM
set pvm_commands=/tmp/temp_pvm_input.$$
rm -f ${pvm_commands};
foreach slave (${PVM_SLAVE_HOSTS})
        echo "add $slave" >>! ${pvm_commands};
end
echo "" >> ${pvm_commands};
echo "quit" >> ${pvm_commands};
pvm < ${pvm_commands} ;
# ========================================================
# Start up JESAM
jesam -anti-sense y -idbases 5 -disbases -15 -gapstart -15 -gapcont -15 -index ${chosen_sp_index}.new -slaves ${PVM_SLAVE_COUNT} -slavehost remote -cache ${DIST_CACHE_SCALE} -seqfiles ${new_seqs_file};
echo "halt" >! ${pvm_commands};
pvm < ${pvm_commands} ;
# ========================================================
# Start up the temporary alignserver
alignserver ${chosen_sp_index}.new /tmp/alignmentDB.ior.$$ &
# Get Process ID of alignserver but cannot pipe output of jobs, and csh has no $! variable
jobs -l >! /tmp/jobs_output.$$
set tmp_align_pid=`tail -1 /tmp/jobs_output.$$ | cut -c8-15 | awk '{print $1}'`;
# ========================================================
# Start up the temporary name server
sleep 10
if ( -f clusterdb.next) rm -f clusterdb.next;
cp -pf clusterdb clusterdb.next
(nameserv --ior >! /tmp/naming.ior.$$) >& /dev/null &
jobs -l >! /tmp/jobs_output.$$
set tmp_name_pid=`tail -1 /tmp/jobs_output.$$ | cut -c8-15 | awk '{print $1}'`;
# ========================================================
# Start up the temporary clusterdb
echo "# Clustering the new sequences";
rm -f /tmp/clusterDB.ior.$$;
# (env CLASSPATH=${CLASSPATH}:${CLUST_CLASS_DIR}/${CLUSTER_JAR} java -mx100M embl.ebi.est.cluster.SuperClusterServer file:/tmp/naming.ior.$$ file:/tmp/alignmentDB.ior.$$ /tmp/clusterDB.ior.$$ clusterdb.next ${chosen_species} >& /dev/null; sleep 10) &
env CLASSPATH=${CLASSPATH}:${CLUST_CLASS_DIR}/${CLUSTER_JAR} java -mx100M embl.ebi.est.cluster.SuperClusterServer file:/tmp/naming.ior.$$ file:/tmp/alignmentDB.ior.$$ /tmp/clusterDB.ior.$$ clusterdb.next ${chosen_species} &
jobs -l >! /tmp/jobs_output.$$
set tmp_cluster_pid=`tail -1 /tmp/jobs_output.$$ | cut -c8-15 | awk '{print $1}'`;
# Wait for the clusterdb to finish
while (! -f /tmp/clusterDB.ior.$$)
        set running=`ps -o pid -p ${tmp_cluster_pid} | grep -v "PID" | wc -l`;
        if ((! ${running}) && (! -f /tmp/clusterDB.ior.$$)) then
                echo "# Error clustering the new sequences for ${chosen_species}";
                exit (1);
        endif
        sleep 30
end
sleep 10
# Copy over the new indexes before killing the old
cp -pfr ${chosen_sp_index}.new ${chosen_sp_index}.next;
# Kill all the temporary servers
kill ${tmp_cluster_pid} ${tmp_name_pid} ${tmp_align_pid};
# Kill all the running production servers
if (-f ${CLUSTER_PID_NAME}) kill `cut -f1 ${CLUSTER_PID_NAME}`
if (-f ${ALIGN_PID_NAME}) kill `cut -f1 ${ALIGN_PID_NAME}`
if (-d ${chosen_sp_index}) mv ${chosen_sp_index} ${chosen_sp_index}.bak
mv ${chosen_sp_index}.next ${chosen_sp_index}
if (-f clusterdb) mv clusterdb clusterdb.bak
mv clusterdb.next clusterdb
startOneSpecies ${chosen_species};
# ==========================================================
# Cleaning up
rm -f ${pvm_commands};
Starting server for one species
#!/bin/csh
# Author Jeremy Parsons
# =================================
# Bugs/Improvements needed:
# 1) This only works on sol4 (need NFS crossmounts)
# 2) Need checks to see if naming service is still running before starting.
# 3) Need to kill old servers if still running.
# ==================================
set CONF_SCRIPT_NAME="cluster_env_config"
set config_script=`which ${CONF_SCRIPT_NAME}`;
if ( -r $config_script) then
        source $config_script;
else
        echo "Could not find the environment setup script.";
endif 
# ========================================
# Parse parameters
if (${#argv} != 1) then
        echo "The script $0 was given the wrong number of parameters.";
        echo "Usage: $0 species_nickname(eg. mouse)";
        exit(1);
endif
set chosen_species=${argv[1]};
set chosen_sp_index="${chosen_species}_index";
cd ${CLUSTER_HOME};
if (! -d ${chosen_species}) then
        echo "Script $0 could not find a directory called ${chosen_species} in ${CLUSTER_HOME}";
endif
cd ${chosen_species};
# ==========================================================
# Start Name server if needed
# Assume not running
set running=0
if ( -r ${NAMING_PID_FILE}) then
        set temp_pid=`cut -f1 ${NAMING_PID_FILE}`;
        set running = `ps -p ${temp_pid} | grep -v PID | wc -c`;
endif
if (! ${running}) then
        (nameserv --ior > ${NAMING_IOR_FILE}) >& /dev/null &
        jobs -l >! /tmp/jobs_output.$$
        tail -1 /tmp/jobs_output.$$ | cut -c8-15 | awk '{print $1}' >! ${NAMING_PID_FILE};
endif
# ==========================================================
# Start Alignment Servers after killing anything already up
cd ${CLUSTER_HOME}
if (! -d ${chosen_species}) then
        echo "Script $0 cannot find the directory ${chosen_species} in ${CLUSTER_HOME}";
        exit (1);
endif
cd ${chosen_species};
set running=0
if ( -r ${ALIGN_PID_NAME}) then
        set temp_pid=`cut -f1 ${ALIGN_PID_NAME}`;
        set running = `ps -p ${temp_pid} | grep -v PID | wc -c`;
endif
if (${running}) kill ${temp_pid};
rm -f alignmentDB.ior;
echo "# About to start alignment server for ${chosen_species}";
alignserver ${chosen_sp_index} alignmentDB.ior >& /dev/null &
jobs -l >! /tmp/jobs_output.$$
tail -1 /tmp/jobs_output.$$ | cut -c8-15 | awk '{print $1}' >! ${ALIGN_PID_NAME};
# Need to wait a while to be sure that the alignment server is running
while (! -r alignmentDB.ior) 
        sleep 30
end
sleep 30
# ==========================================================
# Now do the cluster server
echo "# About to start cluster server for ${chosen_species}";
env CLASSPATH=${CLASSPATH}:${CLUST_CLASS_DIR}/${CLUSTER_JAR} ${MY_JAVA} -mx100M embl.ebi.est.cluster.SuperClusterServer file:${NAMING_IOR_FILE} file:$PWD/alignmentDB.ior  clusterDB.ior clusterdb ${chosen_species} >& /dev/null &
jobs -l >! /tmp/jobs_output.$$
tail -1 /tmp/jobs_output.$$ | cut -c8-15 | awk '{print $1}' >! ${CLUSTER_PID_NAME};
cd ..
# ==========================================================
#!/bin/csh
# Author Jeremy Parsons
# =================================
# Bugs/Improvements needed:
# 1) This only works on sol4 (need NFS crossmounts)
# 2) Need checks to see if naming service is still running before starting.
# 3) Need to kill old servers if still running.
# ==================================
set CONF_SCRIPT_NAME="cluster_env_config"
set config_script=`which ${CONF_SCRIPT_NAME}`;
if ( -r $config_script) then
        source $config_script;
else
        echo "Could not find the environment setup script.";
endif 
# ==========================================================
# Start Name server
(nameserv --ior > ${NAMING_IOR_FILE}) >& /dev/null &
# ==========================================================
# Start Alignment Servers
cd $CLUSTER_HOME
foreach db ($species)
        cd $db
        alignserver ${db}_index alignmentDB.ior >& /dev/null &
        cd ..
end
# ==========================================================
# Start the trace server
setenv CLASSPATH ${CLASSPATH}:${CLUST_CLASS_DIR}/${TRACE_APPLET_JAR}
${MY_JAVA} embl.ebi.trace.TraceServer ${WEBESTDIR}/TraceServer.config -ORBnaming `cat ${NAMING_IOR_FILE}` >& /dev/null &
# ==========================================================
# Need to wait a while to be sure that all the alignment servers are running
sleep 60
# ==========================================================
# Now do all the cluster servers
setenv CLASSPATH ${CLASSPATH}:${CLUST_CLASS_DIR}/${CLUSTER_JAR}
foreach db ($species)
        cd $db
        ${MY_JAVA} -mx100M embl.ebi.est.cluster.SuperClusterServer file:${NAMING_IOR_FILE} file:$PWD/alignmentDB.ior  clusterDB.ior clusterdb $db  >& /dev/null &
        cd ..
end
# ==========================================================
# Now do all the music database startups :-)
cd ${CDDB_HOME}
setenv CLASSPATH ${CLASSPATH}:${CLUST_CLASS_DIR}/${CDDB_JAR}
${MY_JAVA} EMBL.EBI.CDDB.MusicDBServer myDB myDB.ref | & mailx -s "MusicDB Message" jparsons &