{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile requirements.txt\n", "python-dotenv==0.20.0\n", "mysql-connector-python==8.0.30\n", "ipython-sql==0.4.1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install -r requirements.txt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "import mysql.connector\n", "import pandas as pd\n", "import sqlalchemy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile .env\n", "HOST1=\n", "USER1=\n", "PASSWORD1=\n", "DATABASE1=" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from dotenv import load_dotenv\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "USERNAME = os.getenv('USER1')\n", "PASSWORD = os.getenv('PASSWORD1')\n", "HOST = os.getenv('HOST1')\n", "DATABASE = os.getenv('DATABASE1')\n", "print(USERNAME , HOST, DATABASE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "TABLE = \"us_housing_units\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "conn_str = 'mysql+mysqlconnector://{0}:{1}@{2}/{3}'.format(USERNAME, PASSWORD, HOST, DATABASE)\n", "conn = sqlalchemy.create_engine(conn_str)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "%config SqlMagic.autopandas=True\n", "%config SqlMagic.displaycon=False\n", "%config SqlMagic.feedback=False\n", "%reload_ext sql\n", "%sql {conn_str}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!aws s3 ls s3://jan16-data/projects/sql/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!mkdir -p data\n", "!aws s3 cp s3://jan16-data/projects/sql/{TABLE}.csv data/{TABLE}.csv" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearmonthmonth_namesouthwestmidwestnortheast
01968.01.0January35.617.022.612.9
11968.02.0February31.518.623.39.7
21968.03.0March42.517.424.410.7
31968.04.0April42.920.627.012.0
41968.05.0May46.220.025.120.0
........................
5542014.03.0March37.715.89.84.4
5552014.04.0April34.215.09.25.5
5562014.05.0May33.717.512.210.0
5572014.06.0June36.215.213.17.0
5582014.07.0July35.815.912.95.9
\n", "

559 rows × 7 columns

\n", "
" ], "text/plain": [ " year month month_name south west midwest northeast\n", "0 1968.0 1.0 January 35.6 17.0 22.6 12.9\n", "1 1968.0 2.0 February 31.5 18.6 23.3 9.7\n", "2 1968.0 3.0 March 42.5 17.4 24.4 10.7\n", "3 1968.0 4.0 April 42.9 20.6 27.0 12.0\n", "4 1968.0 5.0 May 46.2 20.0 25.1 20.0\n", ".. ... ... ... ... ... ... ...\n", "554 2014.0 3.0 March 37.7 15.8 9.8 4.4\n", "555 2014.0 4.0 April 34.2 15.0 9.2 5.5\n", "556 2014.0 5.0 May 33.7 17.5 12.2 10.0\n", "557 2014.0 6.0 June 36.2 15.2 13.1 7.0\n", "558 2014.0 7.0 July 35.8 15.9 12.9 5.9\n", "\n", "[559 rows x 7 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(f\"data/{TABLE}.csv\")\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df[\"end_time\"] = pd.to_datetime(df[\"end_time\"])\n", "df.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.to_csv(f\"data/{TABLE}.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "CREATE TABLE us_housing_units (\n", "\tyear FLOAT(53), \n", "\tmonth FLOAT(53), \n", "\tmonth_name TEXT, \n", "\tsouth FLOAT(53), \n", "\twest FLOAT(53), \n", "\tmidwest FLOAT(53), \n", "\tnortheast FLOAT(53)\n", ")\n", "\n", "\n" ] } ], "source": [ "print(pd.io.sql.get_schema(df,\n", " name=TABLE,\n", " con=conn))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: []\n", "Index: []" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%sql\n", "\n", "CREATE TABLE IF NOT EXISTS us_housing_units (\n", "\tyear FLOAT(53), \n", "\tmonth FLOAT(53), \n", "\tmonth_name TEXT, \n", "\tsouth FLOAT(53), \n", "\twest FLOAT(53), \n", "\tmidwest FLOAT(53), \n", "\tnortheast FLOAT(53)\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%sql\n", "truncate table {TABLE}" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Mac/Linux" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!echo mysqlimport --local \\\n", " --compress \\\n", " --user={USERNAME} \\\n", " --password \\\n", " --host={HOST} \\\n", " --ignore-lines=1 \\\n", " --fields-terminated-by=\\',\\' {DATABASE} data/{TABLE}.csv " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Windows" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!echo mysqlimport --local \\\n", " --compress \\\n", " --user={USERNAME} \\\n", " --password \\\n", " --host={HOST} \\\n", " --ignore-lines=1 \\\n", " --fields-terminated-by=',' {DATABASE} data\\{TABLE}.csv " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%sql\n", "select count(*) from {TABLE}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%sql\n", "select * from {TABLE} limit 10" ] } ], "metadata": { "kernelspec": { "display_name": "env-spacy", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "343191058819caea96d5cde1bd3b1a75b4807623ce2cda0e1c8499e39ac847e3" } } }, "nbformat": 4, "nbformat_minor": 2 }