Skip to content

Commit 74fce88

Browse files
authored
Merge pull request #262 from LadnerLab/prerelease
Make file and update for latest version
2 parents e827ea4 + 4ed58b2 commit 74fce88

19 files changed

Lines changed: 418 additions & 30 deletions

CMakeLists.txt

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,24 @@ list( APPEND PepSIRF_LINK_LIBS
5252
)
5353

5454
if(OpenMP_FOUND)
55-
message( "OpenMP enabled" )
56-
list( APPEND PepSIRF_LINK_LIBS OpenMP::OpenMP_CXX )
57-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp")
58-
add_definitions( -DENABLE_OPENMP )
55+
message("OpenMP enabled")
56+
57+
if(APPLE)
58+
# Get libomp filepath
59+
execute_process(COMMAND brew --prefix libomp OUTPUT_VARIABLE BREW_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE)
60+
61+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp -I${BREW_PREFIX}/include")
62+
list(APPEND PepSIRF_LINK_LIBS "${BREW_PREFIX}/lib/libomp.dylib")
63+
else()
64+
list(APPEND PepSIRF_LINK_LIBS OpenMP::OpenMP_CXX)
65+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp")
66+
endif()
67+
68+
# Define OpenMP macro
69+
add_definitions(-DENABLE_OPENMP)
5970

6071
else()
61-
message( "WARNING: OpenMP not found, parallelism disabled." )
72+
message("WARNING: OpenMP not found, parallelism disabled.")
6273
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas -Wno-unused-value")
6374
endif()
6475

docs/5-changelog.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@ permalink: /changelog/
1010

1111
## 1.7.0 | 2024-10-3
1212

13-
<strong>Docker: added new feature (Issue #254).</strong> Added the ability to run PepSIRF as a Docker image and added a page for instructions.
13+
### Bug Fixes:
1414

1515
<strong>CMakelists: bug fix (Issue #197).</strong> Resolved CMake not locating OpenMP on MacOS. Tutorial for fix added to installation page.
1616

17+
## New Features:
18+
19+
<strong>Docker: added new feature (Issue #254).</strong> Added the ability to run PepSIRF as a Docker image and added a page for instructions.
20+
1721
<strong>Subjoin: added new feature (Issue #236).</strong> Added a functionality to the "-i" option in Subjoin to accept a regex pattern instead of a filename which contains sample/peptide names. The sample/peptide names used from the score matrix file will be filtered by whether they contain the regex pattern.
1822

1923
<strong>Demux: added new feature (Issue #234).</strong> Added "--unmapped-reads-output" option to Demux, which writes all reads that have not been mapped to a sample/peptide to the specified filename.

extensions/e_k_bias.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env python3
2+
3+
import pandas as pd
4+
import argparse
5+
import fastatools as ft
6+
import numpy as np
7+
8+
def main():
9+
10+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
11+
12+
parser.add_argument('-i', '--fasta-file', help='Directory with enriched petide files for input', required=True)
13+
parser.add_argument('-o', '--output-file', default="e_k_bias_out.tsv", help='Name of .tsv to output file with AA bias data')
14+
15+
args = parser.parse_args()
16+
17+
# get proportion of e's and k's for each peptide
18+
e_k_props = get_e_k_props(args.fasta_file)
19+
20+
# get percentiles
21+
percentile_dict = get_percentiles(e_k_props)
22+
23+
# create output df
24+
out_data = [(name, round(prop, 3), round(percentile_dict[name], 2)) for name, prop in e_k_props.items()]
25+
26+
pd.DataFrame(out_data, columns=["CodeName", "e_k_Prop", "e_k_Percentile"]).to_csv(args.output_file, index=False, sep='\t')
27+
28+
29+
# get proportion of e's and k's for each peptide
30+
def get_e_k_props(fasta_file)->dict:
31+
e_k_props = dict()
32+
33+
# get props for peptide file
34+
fasta_dict = ft.read_fasta_dict(fasta_file)
35+
36+
# iterate through each sequence
37+
for name, seq in fasta_dict.items():
38+
e_k_count = 0
39+
40+
# loop through each AA, get count of e and k
41+
for aa in seq:
42+
if aa.lower() == 'e' or aa.lower() =='k':
43+
e_k_count += 1
44+
45+
# add proportion to dict
46+
e_k_props[name] = (e_k_count) / len(seq)
47+
48+
return e_k_props
49+
50+
# get percentile of each peptide using its e and k proportion
51+
def get_percentiles(e_k_props)->dict:
52+
# Calculate percentile of each peptide
53+
names = list(e_k_props.keys())
54+
all_props = np.array(list(e_k_props.values()))
55+
56+
# Get unique values and array for mapping each original value to its corresponding index in the unique array
57+
unique_props, inverse_indices = np.unique(all_props, return_inverse=True)
58+
59+
# Calculate percentiles based on the unique props
60+
percentile_ranks = np.linspace(0, 100, len(unique_props))
61+
62+
# Map sorted values back to the original corresponding name
63+
percentile_dict = {names[i]: percentile_ranks[inverse_indices[i]] for i in range(len(names))}
64+
65+
return percentile_dict
66+
67+
68+
if __name__ == "__main__":
69+
main()

extensions/findEpitopes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ def read_check_align_file(directory):
223223

224224
# Construct the full file path
225225
filepath = os.path.join(directory, 'checkAlignLength.out')
226+
226227
#filepath = os.path.join('checkAlignLength.out')
227228

228229
# Read the file content
@@ -253,6 +254,7 @@ def process_files_probes(probes_dict, directory_path):
253254
aligned_probes_file = filename.replace('.fasta', '_probesAligned.txt')
254255
aligned_probes_path = os.path.join(directory_path, aligned_probes_file)
255256

257+
256258
result[filename], file_2_pep_pos_dict[filename] = process_file_probes(data, aligned_probes_path)
257259

258260
return result, file_2_pep_pos_dict

include/modules/core/matrix.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class matrix
8585
bool end()
8686
{
8787
return current_idx >=
88-
this->matr.access_to_1d( this->matr.N, this->matr.M );
88+
this->matr->access_to_1d( this->matr->N, this->matr->M );
8989
}
9090

9191
/**
@@ -126,7 +126,7 @@ class matrix
126126
**/
127127
mutable_iterator& next_row()
128128
{
129-
this += this->matr.M;
129+
this += this->matr->M;
130130
return *this;
131131
}
132132

@@ -139,7 +139,7 @@ class matrix
139139
**/
140140
mutable_iterator &next_col()
141141
{
142-
this += this->matr.N;
142+
this += this->matr->N;
143143
return *this;
144144
}
145145

@@ -240,7 +240,7 @@ class matrix
240240
bool end()
241241
{
242242
return current_idx >=
243-
this->matr.access_to_1d( this->matr.N, this->matr.M );
243+
this->matr->access_to_1d( this->matr->N, this->matr->M );
244244
}
245245

246246
/**
@@ -281,7 +281,7 @@ class matrix
281281
**/
282282
iterator& next_row()
283283
{
284-
this += this->matr.M;
284+
this += this->matr->M;
285285
return *this;
286286
}
287287

@@ -294,7 +294,7 @@ class matrix
294294
**/
295295
iterator &next_col()
296296
{
297-
this += this->matr.N;
297+
this += this->matr->N;
298298
return *this;
299299
}
300300

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#ifndef PEPSIRF_VERSION_HH_INCLUDED
22
#define PEPSIRF_VERSION_HH_INCLUDED
33

4-
#define PEPSIRF_VERSION "1.6.0"
4+
#define PEPSIRF_VERSION "1.7.0"
55

66
#endif

include/modules/core/sequence.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,14 @@ class sequence
7272
**/
7373
bool operator==( const sequence& s ) const;
7474

75+
/**
76+
* Less than operator override.
77+
* For two sequences a and b we say a < b iff
78+
* a.seq < b.seq.
79+
* @param s Sequence to compare against.
80+
**/
81+
bool operator<( const sequence& s ) const;
82+
7583

7684
/**
7785
* Get the length of the 'seq' member of this class.

include/modules/demux/module_demux.h

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,22 @@ class module_demux : public module
109109
std::vector<sequence> &lib_seqs
110110
);
111111

112+
/**
113+
* Outputs truncated sequence info for unqiue sequences, non-unqiue
114+
* sequences, and the new fasta-formatted file
115+
* @param seq_length Sequence length specified with the "--seq" option
116+
* @param lib_seqs Value of library sequences received from file
117+
* specified by the "--library"
118+
* @param library_fname Library file name, specified by "--library"
119+
* @param trunc_info_outdir Specified output directory with the
120+
* "--trunc_info_outdir" option
121+
**/
122+
void output_trunc_info(
123+
std::size_t seq_length,
124+
std::vector<sequence> lib_seqs,
125+
std::string library_fname,
126+
std::string trunc_info_outdir
127+
);
112128

113129
/**
114130
* Method to zero a vector of size_t elements.
@@ -380,11 +396,32 @@ class module_demux : public module
380396
* Creates a single output fastq file containing all of the reads that have not been mapped to a sample/peptide
381397
* @param filename file to output to
382398
* @param samp_map fastaq output map
383-
* @reads_dup vector of all reads
399+
* @param reads_dup vector of all reads
384400
**/
385401
void create_unmapped_reads_file( std::string filename,
386402
std::map<std::string, std::vector<fastq_sequence>> samp_map, std::vector<fastq_sequence> reads_dup );
387403

404+
/**
405+
* Outputs a vector to a file with a delemiter between entries
406+
* @param vector vector to be outputted, make sure the type of its values can use the << operator for output
407+
* @param filename file to output to
408+
* @param delimiter delimiter to output between values
409+
**/
410+
template <typename T>
411+
void simple_vector_out(std::vector<T> vector, std::string output_file, std::string delimiter)
412+
{
413+
std::ofstream outfile(output_file, std::ios::out);
414+
for (unsigned int i=0; i<vector.size(); i++)
415+
{
416+
outfile << vector[i];
417+
if( i < vector.size()-1 )
418+
{
419+
outfile << delimiter;
420+
}
421+
}
422+
outfile.close();
423+
}
424+
388425
};
389426

390427
#endif /* MODULE_DEMUX_HH_INCLUDED */

include/modules/demux/options_demux.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ class options_demux: public options
3636
int num_indexes;
3737
std::string replicate_info_fname;
3838
std::string unmapped_reads_fname;
39-
4039
bool translation_aggregation;
4140
std::string fastq_out;
41+
std::string trunc_info_outdir;
4242

4343
/**
4444
* The number of fastq records to read per loop. A higher value here will result in higher memory usage by the program.

src/modules/core/sequence.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,8 @@ bool sequence::operator==( const sequence& s ) const
5353
{
5454
return !s.seq.compare( seq );
5555
}
56+
57+
bool sequence::operator<( const sequence& s ) const
58+
{
59+
return seq < s.seq;
60+
}

0 commit comments

Comments
 (0)