defrun(self): print("Process: " + self.name+": reading file...") self.reader() print("Process: " + self.name+": begin to write temporary data to file...") for key in self.outData.keys(): self.queue.put(key, block=True, timeout=None) with open(path+"/"+key, 'a') as File: fcntl.flock(File.fileno(), fcntl.LOCK_EX) for item in self.outData[key]: File.write(item[0]+" "+key+" "+" ".join(item[1:])+"\n") print("Process: " + self.name+": completed write...")
3.排序进程
排序进程的文件名是,主进程从队列中获取得到的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
def sortFile(self): with open(path+"/"+self.fileName, 'r') as File: for line in File.readlines(): line = line.split(" ") try: # [tmpDict[line[3]], line[1],line[2], '0', tmpDict[line[6]], line[4], line[5], '0'] # 都是同一条染色体对应的Chr1-Chr2 Chr1-Chr3 self.outData[line[5]].append( [line[0], line[2], line[3], line[4], line[6], line[7]]) except KeyError: self.outData[line[5]] = [ [line[0], line[2], line[3], line[4], line[6], line[7]]] with open(path+"/"+self.fileName+"_sorted", 'w') as File: sortKey = sorted(self.outData) for key in sortKey: for item in self.outData[key]: File.write(item[0]+" "+self.fileName+" " + " ".join(item[1:4])+" "+key+" "+item[-2]+" "+item[-1])
path = 'tmp'+str(int(time.time())) mkdir(path) workQueue = Queue() # 用于存放子进程文件数据 read_jobs = [] sort_jobs = [] chrosomes = [] pos_list = PartitionFile(fileName, ProcessNum).partion() # 存放所有文件指针坐标 for i in range(ProcessNum): position = pos_list[i] myprocess = readProcess( str(i), fileName, workQueue, position[0], position[1], processFunction) myprocess.start() read_jobs.append(myprocess) for i in read_jobs: i.join() whileTrue: try: chrosomes.append(workQueue.get(block=True, timeout=1)) # 获取子进程数据 except: break for i in list(set(chrosomes)): myprocess = sortProcess(str(i), i) # 排序进程 myprocess.start() sort_jobs.append(myprocess) for i in sort_jobs: i.join()
5.性能测试
单进程
1 2 3 4 5 6 7 8 9 10
Process: 0: reading file... Process: 0: begin to write temporary data to file... Process: 0: completed write... sorting chrosome: Gbar_A01... chrosome: Gbar_A01ok... merge chrosomes to a single file... completed! there are some temporary file in directory: <./tmp1593916264> if you can remove it by yourself! Cost Time is 46.96
四个进程·
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
Process: 1: reading file... Process: 0: reading file... Process: 3: reading file... Process: 2: reading file... Process: 3: begin to write temporary data to file... Process: 1: begin to write temporary data to file... Process: 3: completed write... Process: 2: begin to write temporary data to file... Process: 0: begin to write temporary data to file... Process: 1: completed write... Process: 2: completed write... Process: 0: completed write... sorting chrosome: Gbar_A01... chrosome: Gbar_A01 ok... merge chrosomes to a single file... completed! there are some temporary file in directory: <./tmp1593916319> if you can remove it by yourself! Cost Time is 18.70