{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Project 3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This week I continued my analysis from last week, looking further into the generated phylogenetic trees that I arrived at last time.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: biopython in c:\\users\\samue\\anaconda3\\lib\\site-packages (1.76)\n", "Requirement already satisfied: numpy in c:\\users\\samue\\anaconda3\\lib\\site-packages (from biopython) (1.18.1)\n" ] } ], "source": [ "!pip install biopython\n", "import numpy as np\n", "from Bio.Phylo.TreeConstruction import DistanceMatrix\n", "from Bio.Phylo.TreeConstruction import DistanceTreeConstructor\n", "from Bio import Phylo\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Loading the given data from the position table" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "position_table = pd.read_csv('../../data/position_table.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | seqid | \n", "S_1_1 | \n", "S_1_2 | \n", "S_1_3 | \n", "S_2_1 | \n", "S_2_2 | \n", "S_2_3 | \n", "S_3_1 | \n", "S_3_2 | \n", "S_3_3 | \n", "... | \n", "S_1270_3 | \n", "S_1271_1 | \n", "S_1271_2 | \n", "S_1271_3 | \n", "S_1272_1 | \n", "S_1272_2 | \n", "S_1272_3 | \n", "S_1273_1 | \n", "S_1273_2 | \n", "S_1273_3 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "... | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "
unique | \n", "677 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "... | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "
top | \n", "MT263410.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
freq | \n", "1 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "... | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "677 | \n", "
4 rows × 3820 columns
\n", "\n", " | seqid | \n", "S_1_1 | \n", "S_1_2 | \n", "S_1_3 | \n", "S_2_1 | \n", "S_2_2 | \n", "S_2_3 | \n", "S_3_1 | \n", "S_3_2 | \n", "S_3_3 | \n", "... | \n", "S_1270_3 | \n", "S_1271_1 | \n", "S_1271_2 | \n", "S_1271_3 | \n", "S_1272_1 | \n", "S_1272_2 | \n", "S_1272_3 | \n", "S_1273_1 | \n", "S_1273_2 | \n", "S_1273_3 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "MT007544.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
1 | \n", "MT019529.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
2 | \n", "MT019530.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
3 | \n", "MT019531.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
4 | \n", "MT019532.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
672 | \n", "MT334544.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
673 | \n", "MT334546.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
674 | \n", "MT334547.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
675 | \n", "MT334557.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
676 | \n", "MT334561.1 | \n", "A | \n", "T | \n", "G | \n", "T | \n", "T | \n", "T | \n", "G | \n", "T | \n", "T | \n", "... | \n", "A | \n", "C | \n", "A | \n", "T | \n", "T | \n", "A | \n", "C | \n", "A | \n", "C | \n", "A | \n", "
677 rows × 3820 columns
\n", "