WIP

pydna-group · Oct 3, 2024 · e87d85a · e87d85a
1 parent 7b60609
commit e87d85a
Show file tree

Hide file tree

Showing 3 changed files with 320 additions and 1 deletion.
diff --git a/readme_example.ipynb b/readme_example.ipynb
@@ -0,0 +1,315 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dseqrecord(-60)\n",
+       "\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0m\n",
+       "TACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTA"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pydna.dseqrecord import Dseqrecord\n",
+    "# Let's create a DNA sequence record, and add a feature to it\n",
+    "dsr = Dseqrecord(\"ATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\")\n",
+    "dsr.add_feature(x=0, y=60,type=\"gene\", label=\"my_gene\") # We add a feature to highlight the sequence as a gene\n",
+    "dsr.figure()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LOCUS       name                      60 bp    DNA     linear   UNK 01-JAN-1980\n",
+      "DEFINITION  description.\n",
+      "ACCESSION   id\n",
+      "VERSION     id\n",
+      "KEYWORDS    .\n",
+      "SOURCE      .\n",
+      "  ORGANISM  .\n",
+      "            .\n",
+      "FEATURES             Location/Qualifiers\n",
+      "     misc            1..60\n",
+      "                     /type=\"gene\"\n",
+      "                     /label=\"my_gene\"\n",
+      "ORIGIN\n",
+      "        1 atgcaaacag taatgatgga tgacattcaa agcactgatt ctattgctga aaaagataat\n",
+      "//\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This is how it would look as a genbank file\n",
+    "print(dsr.format(\"genbank\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "forward primer: ATGCAAACAGTAATGATGGA\n",
+      "reverse primer: ATTATCTTTTTCAGCAATAGAATCA\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n",
+       "                        |||||||||||||||||||||||||\n",
+       "                       3ACTAAGATAACGACTTTTTCTATTA5\n",
+       "5ATGCAAACAGTAATGATGGA3\n",
+       " ||||||||||||||||||||\n",
+       "3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Now let's design primers to amplify it\n",
+    "from pydna.design import primer_design\n",
+    "# limit is the minimum length of the primer, target_tm is the desired melting temperature of the primer\n",
+    "amplicon = primer_design(dsr, limit=13, target_tm=55)\n",
+    "# Let's print the primers, and a figure that shows where they align with the template sequence\n",
+    "print(\"forward primer:\", amplicon.forward_primer.seq)\n",
+    "print(\"reverse primer:\", amplicon.reverse_primer.seq)\n",
+    "amplicon.figure()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "          5ATGCAAACAGTAATGATGGA...TGATTCTATTGCTGAAAAAGATAAT3\n",
+       "                                  |||||||||||||||||||||||||\n",
+       "                                 3ACTAAGATAACGACTTTTTCTATTACCTAGGtttt5\n",
+       "5ccccGGATCCATGCAAACAGTAATGATGGA3\n",
+       "           ||||||||||||||||||||\n",
+       "          3TACGTTTGTCATTACTACCT...ACTAAGATAACGACTTTTTCTATTA5"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Let's say we don't want to just amplify it, but we want to add restriction sites to it!\n",
+    "\n",
+    "from pydna.amplify import pcr\n",
+    "# We add the restriction sites to the primers\n",
+    "forward_primer = \"ccccGGATCC\" + amplicon.forward_primer\n",
+    "reverse_primer = \"ttttGGATCC\" + amplicon.reverse_primer\n",
+    "\n",
+    "# We do the PCR\n",
+    "pcr_product = pcr(forward_primer, reverse_primer, dsr)\n",
+    "# The PCR product is of class `Amplicon`, a subclass of `Dseqrecord`.\n",
+    "# When doing a figure, it shows where primers anneal.\n",
+    "pcr_product.figure()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LOCUS       m_y___g_e_n_e             80 bp    DNA     linear   UNK 01-JAN-1980\n",
+      "DEFINITION  pcr_product_f60 id_r60 id.\n",
+      "ACCESSION   m_y___g_e_n_e\n",
+      "VERSION     m_y___g_e_n_e\n",
+      "KEYWORDS    .\n",
+      "SOURCE      .\n",
+      "  ORGANISM  .\n",
+      "            .\n",
+      "FEATURES             Location/Qualifiers\n",
+      "     misc            11..70\n",
+      "                     /type=\"gene\"\n",
+      "                     /label=\"my_gene\"\n",
+      "     primer_bind     11..30\n",
+      "                     /label=\"f60\"\n",
+      "                     /PCR_conditions=\"primer\n",
+      "                     sequence:ccccGGATCCATGCAAACAGTAATGATGGA\"\n",
+      "                     /ApEinfo_fwdcolor=\"#baffa3\"\n",
+      "                     /ApEinfo_revcolor=\"#ffbaba\"\n",
+      "     primer_bind     complement(46..70)\n",
+      "                     /label=\"r60\"\n",
+      "                     /PCR_conditions=\"primer\n",
+      "                     sequence:ttttGGATCCATTATCTTTTTCAGCAATAGAATCA\"\n",
+      "                     /ApEinfo_fwdcolor=\"#baffa3\"\n",
+      "                     /ApEinfo_revcolor=\"#ffbaba\"\n",
+      "ORIGIN\n",
+      "        1 ccccggatcc atgcaaacag taatgatgga tgacattcaa agcactgatt ctattgctga\n",
+      "       61 aaaagataat ggatccaaaa\n",
+      "//\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Dseqrecord(-80)\n",
+       "ccccGGATCC\u001b[48;5;11mATGCAAACAGTAATGATGGATGACATTCAAAGCACTGATTCTATTGCTGAAAAAGATAAT\u001b[0mGGATCCaaaa\n",
+       "ggggCCTAGGTACGTTTGTCATTACTACCTACTGTAAGTTTCGTGACTAAGATAACGACTTTTTCTATTACCTAGGtttt"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# If we want to see the sequence more clearly, we can turn it into a `Dseqrecord`\n",
+    "pcr_product = Dseqrecord(pcr_product)\n",
+    "\n",
+    "print(pcr_product.format(\"genbank\"))\n",
+    "\n",
+    "pcr_product.figure()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid decimal literal (861700443.py, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  Cell \u001b[0;32mIn[6], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m    5atg...taa3 |||\u001b[0m\n\u001b[0m    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid decimal literal\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "5atg...taa3 |||\n",
+    "5ccccGGATCCatg3\n",
+    "3attCCTAGGtttt5\n",
+    "|||\n",
+    "3tac...att5\n",
+    "In [5]: Dseqrecord (pcr_product).figure()\n",
+    "Out [5]: Dseqrecord(-29)\n",
+    "ccccGGATCCatgccctaaGGATCCaaaa ggggCCTAGGtacgggattCCTAGGtttt\n",
+    "In [6]: from Bio. Restriction import BamHI # cuts GGATCC a, b, c = pcr product.cut (BamHI)\n",
+    "print(a.figure())\n",
+    "print()\n",
+    "print (b.figure())\n",
+    "print()\n",
+    "print(c.figure())\n",
+    "Dseqrecord(-9)\n",
+    "CCCCG\n",
+    "ggggCCTAG\n",
+    "Dseqrecord(-19)\n",
+    "GATCCatgccctaaG GtacgggattCCTAG\n",
+    "Dseqrecord(-9)\n",
+    "GATCCaaaa\n",
+    "Gtttt\n",
+    "In [7]: vector = Dseqrecord(\"aatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct\"\n",
+    "circular=True, name=\"vect\")\n",
+    "In [8]: vector.figure()\n",
+    "Out[8]: Dseqrecord (050)\n",
+    "aatgtttttccctCCCGGGcaaaatAGATCTtgctatgcatcatcgatct ttacaaaaagggaGGGCCCgttttaTCTAGAacgatacgtagtagctaga\n",
+    "In [9]: from Bio. Restriction import BglII # cuts AGATCT\n",
+    "linear_vector_bgl vector.linearize (BglII)\n",
+    "rec_vector= (linear_vector_bgl + b).looped().synced(vector)\n",
+    "rec_vector.figure()\n",
+    "Out [9]: Dseqrecord (065)\n",
+    "aatgtttttccctCCCGGGcaaaatAGATCCatgccctaaGGATCTtgctatgcatcatcgatct ttacaaaaagggaGGGCCCgttttaTCTAGGtacgggattCCTAGAacgatacgtagtagctaga\n",
+    "In [10]: gene2 = Dseqrecord(\"cctCCCatgaaataaGGGcaa\", name=\"gene2\") gene2.add_feature (6,15)\n",
+    "gene2.figure()\n",
+    "Out[10]: Dseqrecord(-21)\n",
+    "cctCCCatgaaataaGGGcaa\n",
+    "ggaGGGtactttattCCCgtt\n",
+    "In [11]: from pydna.assembly import Assembly\n",
+    "from Bio. Restriction import SmaI # cuts CCCGGG\n",
+    "linear_vector_sma = vector.linearize(SmaI)\n",
+    "asm = Assembly ((linear_vector_sma, gene2), limit=6)\n",
+    "candidate, *rest = asm.assemble_circular()\n",
+    "In [12]: candidate.figure()\n",
+    "Out[12] - vect_lin❘ 6\n",
+    "\\/\n",
+    "^\\\n",
+    "6 gene2 6\n",
+    "V\n",
+    "ハ\n",
+    "6-\n",
+    "In [13]: candidate candidate.synced (vector, limit=10)\n",
+    "In [14]: Dseqrecord (candidate).figure()\n",
+    "Out[14]: Dseqrecord(059)\n",
+    "aatgtttttccctCCCatgaaataaGGGcaaaatAGATCTtgctatgcatcatcgatct ttacaaaaagggaGGGtactttattCCcgttttaTCTAGAacgatacgtagtagctaga\n",
+    "In [16]: from pydna.gel import gel\n",
+    "from pydna.ladders import GeneRuler_1kb_plus\n",
+    "In [17] band Dseqrecord(\"GATC\"*500)\n",
+    "In [18]: gel([ GeneRuler_1kb_plus,\n",
+    "Out [18]: 20000 -\n",
+    "[band, ]])\n",
+    "10000 -\n",
+    "7000\n",
+    "5000\n",
+    "4000\n",
+    "-\n",
+    "3000\n",
+    "-\n",
+    "2000\n",
+    "1500\n",
+    "1000\n",
+    "700\n",
+    "500\n",
+    "400\n",
+    "300\n",
+    "200\n",
+    "75"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/pydna/amplify.py b/src/pydna/amplify.py
@@ -17,7 +17,7 @@
 from pydna.utils import flatten as _flatten
 
 # from pydna.utils import memorize as _memorize
-from pydna.utils import rc as _rc
+from pydna.utils import rc as _rc, shift_feature as _shift_feature
 from pydna.amplicon import Amplicon as _Amplicon
 from pydna.primer import Primer as _Primer
 from pydna.seqrecord import SeqRecord as _SeqRecord
@@ -351,6 +351,8 @@ def products(self):
                     feats = self.template[
                         fp.position - fp._fp : rp.position + rp._fp
                     ].features  # Save features covered by primers
+                    shift_amount = len(fp.tail)
+                    feats = [_shift_feature(f, shift_amount, None) for f in feats]
                     tpl = self.template
                 else:
                     continue

diff --git a/src/pydna/utils.py b/src/pydna/utils.py
@@ -80,6 +80,8 @@ def shift_location(original_location, shift, lim):
     """docstring."""
     newparts = []
     strand = original_location.strand
+    if lim is None:
+        lim = _sys.maxsize
 
     for part in original_location.parts:
         new_start = (part.start + shift) % lim