From ad1c7dd44351c178f7b79512dba3f369d46d87cb Mon Sep 17 00:00:00 2001 From: trevored-py Date: Wed, 13 May 2026 09:27:05 -0700 Subject: [PATCH 1/2] Add trevored-py to the README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 44e0723..4f9f00f 100644 --- a/README.md +++ b/README.md @@ -91,3 +91,4 @@ scripts/ ``` Each round's `data/` directory is generated locally and gitignored. +This is trevored-py's PR From 960f8da6c094826a756011ec3a1aa6a068e096e5 Mon Sep 17 00:00:00 2001 From: trevored-py Date: Sat, 16 May 2026 15:28:44 -0700 Subject: [PATCH 2/2] bytes.find() solution --- rounds/3_dna/solution.py | 52 ++++++++++++++++++++++++--- rounds/3_dna/target_b.txt | 1 + rounds/3_dna/working_solution.py | 61 ++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 rounds/3_dna/target_b.txt create mode 100644 rounds/3_dna/working_solution.py diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py index 8b917da..07bf051 100644 --- a/rounds/3_dna/solution.py +++ b/rounds/3_dna/solution.py @@ -5,13 +5,57 @@ own faster implementation. """ -from .baseline import find_matches as _baseline - def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]: """Find every FASTA record whose sequence contains ``pattern``. Returns ``[(record_id, [positions...]), ...]`` in file order. """ - # TODO: remove this delegation and write your own implementation here. - return _baseline(fasta_path, pattern) + + results = [] + + current_id = None + chunks = [] + + with open(fasta_path, "rb") as f: + for line in f: + line = line.rstrip(b"\n") + + if line.startswith(b">"): + if current_id is not None: + seq = b"".join(chunks) + positions = find_all(seq, pattern) + + if positions: + results.append((current_id, positions)) + + current_id = line[1:].decode("ascii") + chunks = [] + + else: + chunks.append(line) + + if current_id is not None: + seq = b"".join(chunks) + positions = find_all(seq, pattern) + + if positions: + results.append((current_id, positions)) + + return results + + +def find_all(seq: bytes, pattern: bytes) -> list[int]: + positions = [] + start = 0 + + while True: + pos = seq.find(pattern, start) + + if pos == -1: + break + + positions.append(pos) + start = pos + 1 + + return positions \ No newline at end of file diff --git a/rounds/3_dna/target_b.txt b/rounds/3_dna/target_b.txt new file mode 100644 index 0000000..3be2325 --- /dev/null +++ b/rounds/3_dna/target_b.txt @@ -0,0 +1 @@ +target = bytearray([65, 71, 84, 67, 67, 71, 84, 65]) diff --git a/rounds/3_dna/working_solution.py b/rounds/3_dna/working_solution.py new file mode 100644 index 0000000..07bf051 --- /dev/null +++ b/rounds/3_dna/working_solution.py @@ -0,0 +1,61 @@ +"""Your Round 3 solution — DNA sequence matcher. + +**Edit this file.** It currently delegates to ``baseline.py`` so everything +passes out of the box. Replace the body of ``find_matches`` with your +own faster implementation. +""" + + +def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]: + """Find every FASTA record whose sequence contains ``pattern``. + + Returns ``[(record_id, [positions...]), ...]`` in file order. + """ + + results = [] + + current_id = None + chunks = [] + + with open(fasta_path, "rb") as f: + for line in f: + line = line.rstrip(b"\n") + + if line.startswith(b">"): + if current_id is not None: + seq = b"".join(chunks) + positions = find_all(seq, pattern) + + if positions: + results.append((current_id, positions)) + + current_id = line[1:].decode("ascii") + chunks = [] + + else: + chunks.append(line) + + if current_id is not None: + seq = b"".join(chunks) + positions = find_all(seq, pattern) + + if positions: + results.append((current_id, positions)) + + return results + + +def find_all(seq: bytes, pattern: bytes) -> list[int]: + positions = [] + start = 0 + + while True: + pos = seq.find(pattern, start) + + if pos == -1: + break + + positions.append(pos) + start = pos + 1 + + return positions \ No newline at end of file