首页 > 代码库 > fasta矩阵组合脚本

fasta矩阵组合脚本

又是给女朋友做的……没有需求就没有动力这真的是一个合格的程序员么喂。

因为是在Windows下用,不能用bash也不能写Python,只好写成VBS。

第一次用VBS,这货比起Python真是难用得要命,微软太不争气了。不过也算是学了一门新技能吧。


其实这脚本一点也不健壮,文件数量过多时会堆栈溢出,文件名太长会报错,文件太大倒是没测过,想必也有问题。

她已经用完了,咱也没动力改了。有需求请自取。


‘Author by alex_lei@163.com
‘用于对当前路径下多个.fasta或.fas文件按矩阵进行组合,结果存放在result目录中
‘示例:当前路径下有A.fasta、B.fasta和C.fasta三个文件
‘      运行脚本后,result目录中将生成A+B.fas、B+C.fas、A+C.fas、A+B+C.fas四个文件
‘      其中,A+B.fas的每一个矩阵都由A.fasta和B.fasta的对应矩阵拼合而成,以此类推
‘      结果矩阵的名称将采用第一个源矩阵的名称
‘限制:请保证每个文件中矩阵的顺序相同且格式正确(每个矩阵以">矩阵名"开头)。

‘主函数入口
Dim fso
Set fso = CreateObject("Scripting.FileSystemObject")
fso.CreateFolder(".\result")
ForEachPair GetFileName


‘获取文件扩展名
Private Function GetFileExt(ByVal sFileName)
    Dim P
    For P = Len(sFileName) To 1 Step -1
        If InStr(".", Mid(sFileName, P, 1)) Then 
            Exit For
        End If
    Next
    GetFileExt = Right(sFileName, Len(sFileName) - P)
End Function

‘获取除了路径和扩展名的主文件名
Private Function GetFileMain(ByVal sFilename)
    Dim P, Q
    For P = Len(sFileName) To 1 Step -1
        If InStr(".", Mid(sFileName, P, 1)) Then 
            Exit For
        End If
    Next
    
    For Q = P To 1 Step -1
        If InStr("\", Mid(sFileName, Q, 1)) Then 
            Exit For
        End If
    Next
    GetFileMain = Mid(sFileName, Q+1, P-Q-1)
End Function

‘获取所有.fasta文件的完整文件名
Private Function GetFileName
    Dim fso, path, filelist, fastafiles()
    Dim i, f, arrsize
    Set fso = CreateObject("Scripting.FileSystemObject")
    Set path = fso.GetFolder(".")
    Set filelist = path.Files
    i = 0
    arrsize = 1
    Redim fastafiles(arrsize)
    For each f in filelist
        If GetFileExt(f) = "fasta" Or GetFileExt(f) = "fas" Then
            If i = arrsize Then
                arrsize = arrsize + 1
                Redim Preserve fastafiles(arrsize - 1)
            End If
            fastafiles(i) = f
            i = i + 1
        End If
    Next
    If i = 0 Then
        MsgBox("没有找到.fasta文件,请检查本文件所在路径")
        WScript.Quit
    End If
    GetFileName = fastafiles
End Function

‘将一个文件拆分成若干个sample
Private Function SplitSample(ByVal filename)
    Dim fso, f1, content, array, newarray, i
    Set fso = CreateObject("Scripting.FileSystemObject")
    Set f1 = fso.OpenTextFile(filename, 1)
    content = f1.ReadAll
    f1.Close
    array = Split(content, ">", -1, vbTextCompare)
    If array(0) = "" Then
        Redim newarray(UBound(array) - 1)
        For i = 1 To UBound(array)
            newarray(i-1) = array(i)
        Next
    Else
        newarray = array
    End If
    SplitSample = newarray
End Function

‘将两个sample合并为一个,只保留第一个sample的标题
Private Function JoinSample(ByVal sample1, ByVal sample2)
    Dim lnary1, lnary2, tarary, i, j
    
    lnary1 = Split(sample1, vbNewLine, -1, vbTextCompare)
    lnary2 = Split(sample2, vbNewLine, -1, vbTextCompare)
    Redim tarary(UBound(lnary1) + UBound(lnary2) + 1)
    For i = 0 To UBound(lnary1)
        tarary(i) = lnary1(i)
    Next
    For j = i + 1 To i + UBound(lnary2)
        ‘不合并第二个sample的第一行,因为也是标题
        tarary(j) = lnary2(j - i)
    Next
    tarary(0) = ">" & tarary(0) & vbNewLine
    JoinSample = Join(tarary, "")
End Function

‘将两个文件合并到一个文件中
Private Function JoinFile(ByVal file1, ByVal file2)
    Dim tarary, ary1, ary2, tarfile, fso, fp, i, maxelem, sample
    ‘MsgBox file1 & "+" & file2
    tarfile = ".\result\" & GetFileMain(file1) & "+" & GetFileMain(file2) & ".fas"
    Set fso = CreateObject("Scripting.FileSystemObject")
    If fso.FileExists(tarfile) Then
        JoinFile = tarfile
        Exit Function
    End If
    ary1 = SplitSample(file1)
    ary2 = SplitSample(file2)
    If UBound(ary1) <> UBound(ary2) Then
        MsgBox(file1 & "的样本数" & UBound(ary1) & "与" & file2 & "的样本数" & UBound(ary2) & "不相等")
        WScript.Quit
    End If
    maxelem = UBound(ary1)
    Redim tarary(maxelem)
    For i = 0 To maxelem
        tarary(i) = JoinSample(ary1(i), ary2(i))
    Next
    
    ‘写入文件
    Set fp = fso.CreateTextFile(tarfile, True)
    For each sample in tarary
        ‘因为sample末尾没有换行,此处输出换行
        fp.WriteLine(sample)
    Next
    fp.Close
    JoinFile = tarfile
End Function

‘对文件进行排列组合
Private Function ForEachPair(ByVal list)
    Dim i, j, k, newlist, combfile
    If UBound(list) = 1 Then
        ‘只有两个元素则结束递归
        ForEachPair = JoinFile(list(0), list(1))
    Else
        Redim newlist(UBound(list) - 1)
        For i = 0 To UBound(list)
            k = 0
            For j = 0 To UBound(list)
                If i <> j Then
                    newlist(k) = list(j)
                    ‘MsgBox("list(" & j & ") " & list(j) & "newlist(" & k & ") " & newlist(k))
                    k = k + 1
                End If
            Next
            ‘MsgBox(Join(newlist, " | "))
            If i = 0 Then
                combfile = ForEachPair(newlist)
            Else
                ForEachPair newlist
            End If
        Next
        ‘因为不考虑排列,只需用第一个和后面的组合在一起就可以
        ForEachPair = JoinFile(list(0), combfile)
    End If
End Function