!*** RFindPatterns *********************************************************** ! David Mathog, Biology Division, Caltech ! 8-DEC-1994 ! ! Modified so that rather than outputting the actual sequence found ! it outputs a series of files containing the sequence. ! ! It MUST be compiled FORTRAN/EXTEND! ! ! rfindpatterns also has these extra switches: ! /LWIDTH number of bases/amino acids to include before the pattern ! /RWIDTH number of bases/amino acids to include after the pattern ! /replace sequence to insert INSTEAD of the pattern ! /prefix filenames created are prefix#.suffix ! /suffix ! /directory files are placed in directory // prefix#.suffix ! Note that if the pattern is DNA and is found on the reverse strand, ! it is reversed before being written out. ! !*** FindPatterns *********************************************************** !* !* finds simple patterns in DNA or protein sequences. !* !* Copyright (c) April 1985 John Devereux !* Revised for version 4, February 1986 !* !****************************************************************************** Program RFindPatterns Implicit None Include 'GenInclude:EnzData.inc' Include 'GenInclude:FindBlock.Inc' Integer Files, TotFiles Integer NCuts, TotCuts Integer Len, TotLen, State Integer LWidth,RWidth Character*1 Replace(256),Prefix(128),Suffix(128),directory(256) Logical Done, Batch Logical GetParameters Call GCGInit('Rfindpatterns') Call Doc() Call CLComCheck() Done = .FALSE. Batch = .FALSE. Call CLGetBool('BATch', Batch) State = FINDALL Do While ( .not.Done ) If ( GetParameters(Batch, State, LWidth,RWidth 1 ,Replace,prefix,suffix,directory) ) Then If ( Batch ) Call CLSubmit ! program won't return here! !* find and show the patterns from all the sequences files Call FindPats(Files, NCuts, Len, LWidth,RWidth, 1 Replace,prefix,suffix,directory) TotFiles = TotFiles + Files TotLen = TotLen + Len TotCuts = TotCuts + NCuts !* wake up the user If ( Files.ge.20 .and. .not. Terminal ) Call RingBell(1) !* just get more patterns from the interactive user If ( Interactive .and. & Files.gt.0 .and. & .not.Cyclic ) then Call WriteF('\n') State = FINDMOREPATS !* and tell him the bad news about his empty filespec Else if ( files.eq.0 ) then Call WriteF('\n\b *** No files in "%s"! ***\n', WildFName) If ( Cyclic ) Then State = FINDMOREFILES !look for more files and patterns Else Done = .TRUE. End If !* get another filespec and go again Else if ( Cyclic ) then Call WriteF('\n') State = FINDMOREFILES !look for more files and patterns Else Done = .TRUE. ! a batch job. End If Else ! no files or patterns to search Done = .TRUE. End If End Do ! looping through files and patterns Call Summarize(TotCuts, TotLen, TotFiles) ! which calls Account Call WriteF('\S') End ! of RFindPatterns !**** GetParameters ******************************************************** !* !* Gets and checks for validity all necessary user supplied input !* for RFINDPatterns. !* !***************************************************************************** Logical Function GetParameters(Batch, State, LWidth,RWidth 1 ,Replace,prefix,suffix,directory) Implicit None Integer I__ ! CallToFunc conversion Logical L__ ! CallToFunc conversion Include 'GenInclude:EnzData.inc' Include 'GenInclude:FindBLock.inc' Include 'GenInclude:SqConstants.Inc' Integer MAXMISMATCH Parameter ( MAXMISMATCH=15) integer LWidth,RWidth Character Replace(*),prefix(*),suffix(*),directory(*) Logical Batch Integer State, I Character PatString(1024) Character OutFName(256), PatFName(256) Character Text(64) Logical SetAmbiguity, KnowOutFile Logical LTemp Logical CLRetBool, CLGetBool, IsWild, GetPatterns, PortIsTerm Logical CLGetWildFName, CLGetNewFName, CLGetInt, CLNoInteract Logical CLNoArg, CIStrMatch Integer GetIntNum, CLRetInt, GetString, StrFind, Str_Len GetParameters = .TRUE. ! Initial time through, look for all booleans and command line qualifiers... If ( State.eq.FINDALL ) then Cyclic = .FALSE. Interactive = .TRUE. Monitor = .not.CLNoInteract() L__= CLGetBool('MONitor', Monitor) L__= CLGetBool('ONEstrand', OneStrand) L__= CLGetBool('SIXbase', SixBase) L__= CLGetBool('APPend', Append) L__= CLGetBool('Terminal', Terminal) L__= CLgetBool('NAMes', Names) L__= CLGetBool('CIRcular', Circular) If ( CLRetBool('LINear') ) Circular = .FALSE. MinCuts = 1 MaxCuts = MAXSEQLEN MaxCuts = CLRetInt('MAXCuts', 1, MaxCuts, MaxCuts) MinCuts = CLRetInt('MINCuts', 0, MaxCuts, MinCuts) ! Mathog, set up width, RANGE 0<->2000, DEFAULT 5 LWidth = CLRetInt('LWIDth', 0, 20000, 5) RWidth = CLRetInt('RWIDth', 0, 20000, 5) call CLGetStr('REPlace',replace) !note, no error checking at this point, neither prefix nor suffix should !contain a . or : that would interfere with filename creation!!! call CLGetStr('PREfix',prefix) if(str_len(prefix) .eq. 0)call FromStrVar(prefix,'r') call CLGetStr('SUFfix',suffix) if(str_len(suffix) .eq. 0)call FromStrVar(suffix,'rfind') call CLGetStr('DIRectory',directory) If ( CLRetBool('SHOw') ) MinCuts = 0 If ( CLRetBool('ONCe') ) MaxCuts = 1 MisMatch = 0 L__= CLGetInt('MISmatch', 0, MAXMISMATCH, Mismatch) SetAmbiguity = CLNoArg('MISmatch') Perfect = CLRetBool('PERFect') Superset = (CLRetBool('ALL') .or. & CLRetBool('SUPERset') ) .and. & .not.Perfect Text(1) = Char(0) If ( Circular ) Call StrConcat(Text, ' (Circular)') If ( SixBase ) Call StrConcat(Text, ' (Six Base)') If ( Superset ) Call StrConcat(Text, ' (Overlap-set)') If ( Perfect ) Call StrConcat(Text, ' (Perfect)') End If ! First time through ! Look for the files to search. Find loops back and asks the user for ! more files to search after a search is complete... If ( State.eq.FINDALL .or. & State.eq.FINDMOREFILES ) Then !* get the wild file spec for the sequences WildFName(1) = Char(0) LTemp = .not.CLGetWildFName('INfile1', 1, WildFName) If ( LTemp .and. & .not.CLNoInteract() ) then Call WriteF('\n%s RFINDPATTERNS in what sequence(s) ? ', & Text) If ( GetString(WildFName).eq.0 ) then If ( Cyclic ) then GetParameters = .FALSE. Return Else Call WriteF('\n\s') End If End If Cyclic = .true. ! loop back if file is on command line Else Cyclic = .false. ! no loop back if file is on command line End If If ( WildFName(1).eq.Char(0) ) then If ( Cyclic ) Then GetParameters = .FALSE. Return End If Call WriteF('\n\b *** ERROR, RFINDPATTERNS requires an'// & ' input file! ***\n\s'//Char(0)) End If End If ! first time through, or looping for more files !* get the patterns from a local data file or interactively Interactive = GetPatterns(PatFName, NPats) If ( NPats.eq.0 ) then If ( Cyclic ) then GetParameters = .FALSE. Return Else Call WriteF('\n\s') End If End If !* get the mismatch If ( SetAmbiguity ) then If ( .not.CLNoInteract() ) then Call WriteF('\n How many mismatches will you allow'// & ' (* %d *) ? '//Char(0), MisMatch) MisMatch = GetIntNum('MisMatch', 0, MAXMISMATCH, MisMatch) End If End If !* identify and open the outfile If ( .not. KnowOutFile ) then If ( Terminal ) then Call StrCopy(OutFName, 'Term') Else if ( WildFName(1).eq.'@' ) then Call StrCopy(OutFName, WildFName(2)) Call MapCase(OutFName) Call NewFileType(OutFName, '.com') Else if ( .not.IsWild(WildFName) ) then Call NextFile(1, WildFName, OutFName) Call NewFileType(OutFName, '.com') Else Call StrCopy(OutFName, 'rfindpatterns.com') End If If ( .not. Terminal ) then ! If ( .not.CLGetNewFName('OUTfile1', 2, OutFName) ) Then ! If ( .not.CLNoInteract() ) then ! Call WriteF('\n What should I call the output file (*'// ! & ' %s *) ? '//Char(0), OutFName) ! I__= GetString(OutFName) ! If ( CIStrMatch('Term', OutFName) ) ! & Call StrCopy(OutFName, 'GCGStdOut') ! End If ! End If Terminal = PortIsTerm(OutFName) End If ! If ( .not.Batch ) Call OpenFile(OutFile, OutFName, 'w') KnowOutFile = .true. End If If ( .not.Interactive ) & Call ValidatePatterns(OutFile, Patterns, NPats) !* Limited documentation in the output file for Rfindpatterns ! If ( .not.Terminal .and. .not.Batch ) then ! Call FWriteF(OutFile, ! & '$!%s RFINDPATTERNS on %s allowing %d mismatches\n', ! & Text, WildFName, MisMatch) ! End If ! write the qualifier to the command line for submitting to batch If ( Batch ) Then Call OpenFile(OutFile, OutFName, 'wd') Call CloseF(OutFile) Call CLWriteF('INfile1', '=%s', WildFName) Call CLWriteF('OUTfile1', '=%s', OutFName) If ( SetAmbiguity ) Call CLWriteF('MISmatch', '=%d', MisMatch) If ( Interactive ) Then PatString(1) = Char(0) Do i = 1, NPats If ( i.gt.1 ) Call StrConCat(PatString, ',') If ( StrFind(',',Patterns(i).RecSite).ne.0 ) Then Call SWriteF(PatString,'~"%s"', Patterns(i).RecSite) Else Call StrConCat(PatString, Patterns(i).RecSite) End If End Do Call CLWriteF('PATterns','=%s', PatString) Else If ( PatFile.gt.0 ) Call CLWriteF('DATafile', '=%s', PatFName) End If End If End ! of GetParameters !*** FindPats *********************************************************** !* !* find the patterns in the sequences of WildFile. !* !****************************************************************************** Integer Function FindPats( Sequences, TotCuts, TotLen, 1 LWidth,RWidth,Replace,prefix,suffix,directory) Implicit None Include 'GenInclude:Sequence.inc' Include 'GenInclude:EnzData.inc' Include 'GenInclude:FindBlock.Inc' Integer Sequences, TotCuts, TotLen Integer LWidth,RWidth Character Replace(*),prefix(*),suffix(*),directory(*) Record / Sequence / Sq Character Cuts(MAXSEQLEN+COVERLAP+1) Character CSeq(-COVERLAP:MAXSEQLEN+COVERLAP+1) Character Pattern(MAXSITELEN+1) Integer pat, Strand, SeqBegin, UseLen Integer NCuts, SeqCuts, FromTo(20) Logical Done Integer Offset/1/, PeriodMultiple/40/ Integer Str_Len ! make the parameter REVERSE be the same as fortran TRUE so when we need ! to send a flag to showfinds indicating whether it was the reverse, we ! can just use Strand. Integer REVERSE, FORWARD Parameter ( REVERSE = -1, FORWARD = 0) Integer AddCuts Logical SQNext Logical ExcludeCuts Sequences = 0 TotCuts = 0 TotLen = 0 SeqBegin = 1 If (NPats.eq.0) Return Do While ( SQNext(WildFName, Sq) ) SeqCuts = 0 If (Sequences.eq.0) Perfect = Perfect .or. (SQ.Type.eq.SQPROTEIN) Sequences = Sequences + 1 TotLen = TotLen + Sq.Len ! Mathog, for rfindpatterns, always want the full name ! If ( Names ) then Call CapFields(Sq.Name) !CVMS ! Else ! Call BaseName(Sq.Name) ! End If If ( .not.Terminal .and. Monitor ) & Call WriteF('\n%20s len: %10D ', Sq.Name, Sq.Len) !* circularize (this doesn't make a whole lot of sense for proteins) If ( Circular ) then Call Circularize(CSeq, Sq.Seq(1)) Call StrCopy(Sq.Seq, CSeq(1)) If ( .not.Perfect ) then Call SeqToCode(CSeq(SeqBegin-COVERLAP)) Else Call StrToUpper(CSeq(SeqBegin-COVERLAP)) End If Else Call StrCopy(CSeq(SeqBegin), Sq.Seq) If ( .not.Perfect ) then Call SeqToCode(CSeq(SeqBegin)) Else Call StrToUpper(CSeq(SeqBegin)) End If End If UseLen = Str_Len(Sq.Seq) !* find the patterns Strand = FORWARD Pat = 0 Done = .FALSE. Do While ( .not.Done ) If ( Strand.eq.FORWARD ) Then Pat = Pat+1 Call StrCopy(Pattern, Patterns(Pat).RecSite) Else Call StrCopy(Pattern, Patterns(Pat).BotSite) End If If ( Patterns(pat).Use ) then NCuts = AddCuts(CSeq, 1, Sq.Len, & Pattern, Cuts, MisMatch, & Circular, Superset, Perfect, Offset ) TotCuts = TotCuts + NCuts If ( Monitor .and. Mod(pat,PeriodMultiple).eq.0 ) & Call WriteF('.') ! show the patterns in the output file and increment the cuts If ( NCuts.ge.MinCuts .and. & NCuts.le.MaxCuts .and. & (.not.ExcludeCuts(Cuts, & 1, Sq.Len, Circular, FromTo) ) ) then If ( Names ) then SeqCuts = SeqCuts + NCuts Else Call ShowFinds(OutFile, Sq.Name, pat, Sq.Doc, Sq.Seq, & Sq.Len, UseLen, Sq.Check, Cuts, & Pattern, Patterns(pat).Name, & NCuts, SeqCuts, Names, Patterns(pat).IsPattern, Strand, & LWidth,RWidth,Replace,SQ.type, & prefix,suffix,directory) End If End If !* clean up the cuts data structure If ( NCuts.gt.0 ) Call ClearCuts(Cuts, 1, Sq.Len) End If If ( Strand.eq.FORWARD ) then If ( .not.Perfect .and. & Patterns(Pat).BotSite(1).ne.Char(0) ) & Strand = REVERSE Else Strand = FORWARD End If If ( Strand.eq.FORWARD .and. pat.ge.NPats ) Done = .TRUE. End Do ! pattern loop ! show file name and number of cuts in output file if /NAMes on command line If ( Names .and. SeqCuts.gt.0 ) & Call ShowFinds(OutFile, Sq.Name, pat, Sq.Doc, Sq.Seq, & Sq.Len, UseLen, Sq.Check, Cuts, & Patterns(pat).RecSite, Patterns(pat).Name, & NCuts, SeqCuts, Names, Patterns(pat).IsPattern, .FALSE., & LWidth,RWidth,Replace,SQ.type, & prefix,suffix,directory) End Do ! NextFile If (TotCuts.gt.0 .and. Terminal ) Call WriteF('\n') FindPats = TotCuts Return End ! of FindPats !*** GetPatterns ************************************************************ !* !* gets the patterns from a file or queries the user for the !* patterns and fills the data structure Patterns with the data !* from each one. GETPATTERNS returns true if the patterns were !* loaded interactively. !* !****************************************************************************** Logical Function GetPatterns(PatFName, Count) Implicit None Include 'GenInclude:Enzdata.inc' Include 'GenInclude:FindBlock.Inc' Character PatFName(*) Integer Count, PatSetCount Character Pattern(MAXSITELEN+1) Character Name(1000) Logical MyFile, PatSet, ComLineSet Integer Calls/0/ Logical GetDataFile, CLRetBool, CLNoInteract Logical CLGetOldFName, CLArg, CheckPattern Integer ReadPatterns, Str_Len If ( Calls.eq.0 ) then Call StrCopy(PatFName, 'pattern.dat') ComLineSet = CLArg('PATterns') PatSet = ComLineSet .or. CLNoInteract() MyFile = CLRetBool('DATafile') .or. CLRetBool('MYFile') End If !* the user specified pattern file If ( Calls.eq.0 .and. MyFile ) then If ( .not.CLGetOldFName('DATafile', 3, PatFName) ) Then If ( .not.CLNoInteract() ) then Call WriteF('\n What pattern data file (* %s *) ? ', & PatFName) Call GetString(PatFName) End If End If Call OpenFile(PatFile, PatFName, 'r') Count = ReadPatterns(PatFile, SixBase, OneStrand, Patterns) Interactive = .false. !* the local data file "Pattern.dat" Else If ( Calls.eq.0 .and. GetDataFile(PatFile, PatFName) .and. & .not.ComLineSet ) then Count = ReadPatterns(PatFile, SixBase, OneStrand, Patterns) If ( .not.CLNoInteract() ) Call WriteF & ('\n\b Search patterns read from "%s"\n', PatFName) Interactive = .false. !* get the patterns interactively and return true Else if ( Interactive ) then Call CloseF(PatFile) ! GetDataFile opened it !!! If ( .not.PatSet ) then If ( Calls.eq.0 ) then Call WriteF('\n'// & ' Enter patterns individually, one per line.\n'// & ' End the list with a blank line.\n\n'//Char(0)) Else Call WriteF('\n') End If End If Pattern(1) = '#' Count = 0 PatSetCount = 1 ! for reading patterns from /pat=x,y,z Do While ( Pattern(1).ne.Char(0) ) 100 Pattern(1) = Char(0) If ( .not.PatSet ) then Call WriteF(' Pattern %d: ', Count+1) Call GetMenu(Pattern) Else Call CLGetStrList('PATterns', Pattern, PatSetCount) PatSetCount = PatSetCount + 1 End If !* is this pattern legit?? If ( Str_Len(Pattern).gt.MAXSITELEN ) then Call WriteF('\n\b More than %D characters'// & ' in "%s"!\n\n'//Char(0), MAXSITELEN, Pattern) If ( PatSet ) Call WriteF('\n\s') Go to 100 Else if ( Str_Len(Pattern).gt.0 ) then Count = Count + 1 Call SWriteF(Name, '%d', Count) Call CopyPattern(Patterns(Count), Name, Pattern, & SixBase, OneStrand) If ( .not.CheckPattern(Patterns(Count))) Then Call WriteF('\n\b Invalid Syntax in Pattern "%s".'// & ' Pattern will not be used.\n'//Char(0), & Pattern) Call ClearPattern(Patterns(Count)) Count = Count - 1 Go To 100 End If End If End Do Interactive = .not.PatSet End If If ( .not.Interactive .and. Count.eq.0 .and. .not.PatSet) then Call WriteF('\n No legitimate patterns in "%s".\n', PatFName) Call WriteF('\n\s') End If Calls = -1 GetPatterns = Interactive Return End ! of GetPatterns !*** ValidatePatterns ******************************************************** !* !* Verify that all the patterns have valid syntax. !* !******************************************************************************* Subroutine ValidatePatterns(OutFile, Patterns, NPats) Implicit None Include 'GenInclude:Enzdata.inc' Record /EnzymeData/ Patterns(*) Integer OutFile Integer NPats Integer i Logical CheckPattern Do i = 1, NPats If ( .not.CheckPattern(Patterns(i))) Then Patterns(i).Use = .FALSE. ! Call FWriteF(OutFile, '$!Invalid Syntax in Pattern "%s".'// ! & ' Pattern will not be used.\n'//Char(0), ! & Patterns(i).RecSite) End If End Do Return End ! Of ValidatePatterns !*** ReadPatterns *********************************************************** !* !* finds patterns in File and adds them to to Patterns structure !* !****************************************************************************** Integer Function ReadPatterns(File, Six, OneStrand, Patterns) Implicit None Include 'GenInclude:EnzData.inc' Integer File Logical Six, OneStrand Record / EnzymeData / Patterns(*) Integer Count, Length Character String(1000) Character Name(1000), Site(1000) Integer Offset Integer ReadString, SReadF, Str_Len Logical StrIsComment ! initialize Count = 0 Call RewindF(File) Call SkipText(File, '..') ! read file until an enzyme record is found Do While ( ReadString(File, String, Length).ge.0 ) If ( .not.StrIsComment(String) .and. (String(1).ne.';')) then If ( SReadF(String, '%s %d %s', & Name, Offset, Site ).eq.3 ) then ! too many patterns for current data structure? If ( Count.ge.MAXENZCOUNT ) then Call WriteF('\n *** Pattern data file contains '// & 'more than %D entries! ***\n\b'//Char(0), MAXENZCOUNT) ReadPatterns = Count Return End If If ( Str_Len(Site).gt.MAXSITELEN ) Then Call WriteF('\n *** Pattern %s is larger than maximum '// & 'pattern length of %d. \n'//Char(0), Name, MAXSITELEN) Call WriteF('\n Pattern will not be used! \n') Else Count = Count + 1 ! copy this record into a record in Patterns Call CopyPattern(Patterns(Count), Name, Site, Six, OneStrand) End If End If ! SReadF End If ! StrIsComment End Do ! ReadString ReadPatterns = Count Return End ! of ReadPatterns !*** ShowFinds **************************************************************** !* !* displays the finds for this pattern !* !******************************************************************************* Subroutine ShowFinds(File, FName, pat, SeqDoc, Seq, Len, UseLen, & Check, Cuts, PatLine, name, NCuts, SeqCuts, & Names, ispattern, reverse, & LWidth,RWidth,Replace,SQTYPE, & prefix,suffix,directory) Implicit None Integer File Character FName(*) Integer Pat Character SeqDoc(*), Seq(*) Integer Len, Check, UseLen Character Cuts(*) Character PatLine(*), Name(*) Integer NCuts, SeqCuts Logical Names Logical IsPattern, Reverse Integer OldCheck/-1/ Character OldFName(256) Logical CIStrMatch Integer CheckSeq Integer LWidth,RWidth,SQTYPE,count/0/ Character Replace(*),Prefix(*),Suffix(*),Directory(*) !* is this the first find in this sequence !* finds on the opposite strand would cause SHOWFINDS to be called again If ( Check.eq.-1 ) Check = CheckSeq(Seq) If ( .not. ( Check.eq.OldCheck .and. & CIStrMatch(FName, OldFName) ) ) then OldCheck = Check Call StrCopy(OldFName, FName) ! If ( Names ) then ! Call FWriteF(File, '\n%20s ck: %-4d len: %-5D'// ! & ' finds: %-4D ! %.74s\n'//Char(0), ! & FName, Check, Len, SeqCuts, SeqDoc) ! Else ! Call FWriteF(File, '\n%20s ck: %-4d len: %-5D ! %.74s\n', ! & FName, Check, Len, SeqDoc) ! End If End If Call ShowHits(File, pat, SeqDoc, Seq, UseLen, Check, & Cuts, PatLine, name, NCuts, SeqCuts, Names, & ispattern, reverse, & LWidth, RWidth, Replace, SQTYPE,FName, & count,prefix,suffix,directory) Return End ! of ShowFinds !*** CheckPattern *********************************************************** !* !* Determines if the Pattern stored in the EnzymeData structure and !* it's reverse is valid by calling PatInit. !* !****************************************************************************** Logical Function CheckPattern(Pattern) Implicit None Include 'GenInclude:EnzData.inc' Record /EnzymeData/ Pattern Real Sup, Perf Parameter ( Sup = .FALSE., Perf = .FALSE.) Integer Mis Parameter ( Mis = 0) Logical Status Character Temp(MAXSITELEN+1) Logical PatInit, PureGCG Integer SeqToCode, Str_Len Status = .TRUE. Call StrCopy(Temp, Pattern.Recsite) If ( PureGCG(Pattern.RecSite) .or. & (SeqToCode(Temp).eq.Str_Len(Pattern.RecSite)) ) Then Pattern.IsPattern = .FALSE. Else Pattern.IsPattern = .TRUE. Status = PatInit(Pattern.RecSite, SUP, PERF, MIS) If ( Status ) & Status = PatInit(Pattern.BotSite, SUP, PERF, MIS) End If CheckPattern = Status Return End ! of CheckPattern !*** ClearPattern *********************************************************** !* !* ReInitializes a pattern data structure for Find and Motifs. !* !****************************************************************************** Subroutine ClearPattern(Pattern) Implicit None Include 'GenInclude:EnzData.inc' Record /EnzymeData/ Pattern Pattern.RecSite(1) = Char(0) Pattern.TopSite(1) = Char(0) Pattern.BotSite(1) = Char(0) Pattern.Name(1) = Char(0) Pattern.NameLen = 0 Pattern.SiteLen = 0 Pattern.Use = .FALSE. Return End ! of ClearPattern !*** Summarize *************************************************************** !* !* Prints summary of session with RFindPatterns, and Calls !* Account. !* !****************************************************************************** Subroutine Summarize(TotCuts, TotLen, TotFiles) Implicit None Include 'GenInclude:Enzdata.inc' Include 'GenInclude:FindBlock.inc' Integer TotCuts, TotLen, TotFiles !* Exit the program with summaries and clean up !* copy pattern data file into output if 'append' is on the command line If ( Append .and. .not.(Terminal.or.Interactive) ) then If ( OutFile.gt.0 .and. PatFile.gt.0 ) then Call RewindF(PatFile) ! Call FWriteF(OutFile, '\n Here is the pattern data file: '// ! & '"%n"\n\n'//Char(0), PatFile) ! Call CopyFile(PatFile, OutFile) End if End if ! print a little summary in the output file ! If ( .not.(Terminal.or.Names) .and. OutFile.gt.0 ) ! & Call FWriteF(OutFile, ! & '$! Total finds: %10D\n'// ! & '$! Total length: %10D\n'// ! & '$! Total sequences: %10D\n'// ! & '$! CPU time: %10T\n\n'//Char(0), ! & TotCuts, TotLen, TotFiles) ! and on the screen Call WriteF('\n\n'// & ' Total finds: %10D\n'// & ' Total length: %10D\n'// & ' Total sequences: %10D\n'// & ' CPU time: %10T'//Char(0), & TotCuts, TotLen, TotFiles) !* remind the user of the output file's name. If ( .not.Terminal ) Call WriteF('\n\n'// & ' Output file: %n'//Char(0), OutFile) Call Account('RFindPatterns') Call WriteF('\n\n') End ! Of Summarize !*** ShowHits ************************************************************** !* !* This routine is used by pattern searching programs such as !* Motifs and Find to show the hits for this RecSite. !* !* Modified !* 9/3/91 mks Only show the first CUTINFOBUFFSIZE number of hits. !* !****************************************************************************** Subroutine ShowHits(OutFile, pat, SeqDoc, Seq, SeqLen, Check, & Cuts, RecSite, Name, NCuts, SeqCuts, Names, & IsPattern, Reverse, & LWidth,RWidth,Replace,SQTYPE,FName,count, & prefix,suffix,directory) Implicit None Include 'GenInclude:Sequence.inc' Include 'GenInclude:EnzData.inc' Integer OutFile Character SeqDoc(*), Seq(*) Integer SeqLen Integer Check Character Cuts(*) Character RecSite(*), Name(*) Integer pat, NCuts, SeqCuts Logical Names Logical IsPattern, Reverse Integer spos, pos, j, cts, PDFile Integer SubLength, RecSiteLen, HitLength, i, Calls/0/ Character TempName(1000), LowerFilter(0:255) Character SubSeq(MAXSITELEN+1), StrBegin(64) Character LocFName(256), OutLine(2*MAXSITELEN) Character StopLine(81) Logical Lower Record / CutInfo / Hit Logical ReOpenF Character ChToLower Integer Str_Len ! Integer LWidth,RWidth,SQTYPE,Count Integer Out2File Character Replace(*) Character Fname(*),Prefix(*),Suffix(*),Directory(*) Character OutFName(256) logical Do_Replace, Do_Fill Integer myvar,myvar2,mypos,begin_match,end_match Integer my_right_end, my_out_length, my_rep_length,cycles Record / Sequence / OutSq ! only write the seqname line for Ncuts = 0 If ( NCuts.eq.0 .or. Names ) Return ! make up a to-lower-case filter If ( Calls.eq.0 ) then Do i = 0, 255 LowerFilter(i) = ChToLower(Char(i)) End Do Calls = -1 End If ! show the pattern found If (Reverse) Then Call SWriteF(TempName, '%s /Rev', Name) ! Call FWriteF(OutFile,'\n%-20s %s\n', ! & TempName, RecSite ) Else ! Call FWriteF(OutFile,'\n%-20s %s\n', ! & Name, RecSite ) End If ! scan through cuts and find the patterns RecSiteLen = Str_Len(RecSite) SubLength = Str_Len(RecSite) cts = 0 pos = 1 ! If ( NCuts.gt.CUTINFOBUFFSIZE ) ! & Call FWriteF(OutFile,'\n **** This pattern has %d hits.\n'// ! & ' **** Only the first %d are displayed.\n\n', ! & NCuts, CUTINFOBUFFSIZE) Do While ( pos.le.SeqLen .and. cts.lt.CUTINFOBUFFSIZE ) If ( Cuts(pos).ne.Char(0) ) then cts = cts + 1 Lower = Cuts(pos).gt.Char(1) If ( IsPattern ) then Call PatCutInfo(Hit, cts) SubLength = Min(Hit.Span,MAXSITELEN) HitLength = Str_Len(Hit.SeqSite) Else HitLength = RecSiteLen SubLength = RecSiteLen End If ! my_rep_length = str_len(replace) if(count .eq. 0)then if(my_rep_length .ne. 0)then do_replace = .true. if(replace(1) .eq. '*')then do_fill = .true. else do_fill = .false. end if else do_replace = .false. end if end if ! ok, the patterns starts at POS, and is of size SubLength ! ! first fill the Lwidth. If it's off the left side, fill with "." ! outline(1) = char(0) if(REVERSE)then myvar = pos - Rwidth else myvar = pos - Lwidth end if mypos = 1 do while(myvar .lt. pos) if(myvar .ge. 1)then outline(mypos) = Seq(myvar) else outline(mypos) = '.' end if mypos = mypos + 1 myvar = myvar + 1 end do ! ! Now fill in the pattern. Truncate it if a replace string is ! specified that is shorter than the match and NOT a fill. ! ! ! pad it out if replace string is longer than sublength and reverse ! if(reverse .and. 1 (my_rep_length .gt. sublength) 1 .and. .not. do_fill)then do myvar2=1, my_rep_length - sublength outline(mypos) = ' ' mypos = mypos + 1 end do end if ! begin_match = mypos cycles = sublength do myvar2 = 1,sublength if(.not. do_replace .or. do_fill)then outline(mypos) = Seq(myvar) mypos = mypos + 1 else if(myvar2 .le. my_rep_length)then outline(mypos) = Seq(myvar) mypos = mypos + 1 end if myvar = myvar + 1 end do ! ! pad it out if replace string is longer than sublength and NOT reverse ! if(.not. reverse .and. 1 (my_rep_length .gt. sublength) .and. 1 .not. do_fill)then do myvar2=1, my_rep_length - sublength outline(mypos) = ' ' mypos = mypos + 1 end do end if end_match = mypos - 1 ! ! Poss ! ! ! now fill the right width. If it extends past the end of the ! sequence, fill with "." ! if(Reverse)then my_right_end = pos + sublength + Lwidth - 1 else my_right_end = pos + sublength + Rwidth - 1 end if do while(myvar .le. my_right_end) if (myvar .le. SeqLen)then outline(mypos) = Seq(myvar) else outline(mypos) = '.' end if mypos = mypos + 1 myvar = myvar + 1 end do outline(mypos) = char(0) ! ! find out how long it is ! my_out_length = str_len(outline) ! ! reverse if appropriate ! if(reverse)then call RevSeq(outline,1,my_out_length) end if ! ! Now take care of replace, if that is set ! if(do_replace)then if(reverse)begin_match = my_out_length - end_match + 1 mypos = begin_match if(do_fill)then do myvar = 1,sublength outline(mypos) = replace(2) mypos = mypos + 1 end do else do myvar = 1,my_rep_length if(replace(myvar) .ne. '+')then outline(mypos) = replace(myvar) else if(myvar .gt. sublength)stop 1 'RFindPatterns fatal error: "+" used outside of matched pattern' end if mypos = mypos + 1 end do end if end if ! ! now load up the outSQ structure ! Count = Count + 1 call SQClear(OutSQ) ! sequence call StrConcat(OutSQ.seq,outline) call SWriteF(outline,'%s%d',prefix,count) ! name = r##### call StrConcat(OutSQ.name,outline) ! length OutSQ.len = my_out_length ! type OutSQ.type = SQTYPE call SWriteF(OutFName,'%s%s%d.%s',directory,prefix,count,suffix) Call OpenFile(Out2File, OutFName, 'w') ! ! put out some header information ! Call FWriteF(Out2File, & 'RFINDPATTERNS on: %s \n',fname) if(reverse)then outline(1) = char(0) call StrConcat(outline,Recsite) call RevSeq(outline,1,str_len(Recsite)) Call FWriteF(Out2File,'Original file info: %s\n',seqdoc) Call FWriteF(Out2File,'Matching pattern: %s\n',Outline) else Call FWriteF(Out2File,'Original file info: %s\n',seqdoc) Call FWriteF(Out2File,'Matching pattern: %s\n',Recsite) end if Call FWriteF(Out2File, & 'Pattern location: %d to %d\n',pos,pos + sublength - 1) Call FWriteF(Out2File, & 'Lwidth: %d\n',LWidth) Call FWriteF(Out2File, & 'Rwidth: %d\n',RWidth) if(do_replace)Call FWriteF(Out2File, & 'Replaced matching pattern with: %s\n',Replace) if(reverse)Call FWriteF(Out2File, & 'Match and extraction from the REVERSE strand.\n') ! Character Number(MAXACCNUM+1) ! accession number for sequence ! Character Doc(MAXDOCSTRING+1) ! sequence definition ! Character Seq(MAXSEQLEN+1) ! current sequence Call FWriteF(Out2File,'\n') call SQWrite(Out2File,OutSQ) call CloseF(Out2File) End If pos = pos + 1 End Do Return End ! Of ShowHits