Untitled

{
    "nbformat_minor": 1,
    "cells": [
        {
            "execution_count": 25,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "execution_count": 25,
                    "metadata": {},
                    "data": {
                        "text/plain": "{'asperity': 0.32,\n 'maxtemp': 35,\n 'maxvibration': 12,\n 'mintemp': 35,\n 'partno': 100}"
                    },
                    "output_type": "execute_result"
                }
            ],
            "source": "dp = {'partno': 100, 'maxtemp': 35, 'mintemp': 35, 'maxvibration': 12, 'asperity': 0.32}\ndp"
        },
        {
            "execution_count": 26,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "dp1 = {'partno': 100, 'maxtemp': 35, 'mintemp': 35, 'maxvibration': 12, 'asperity': 0.32}\ndp2 = {'partno': 101, 'maxtemp': 46, 'mintemp': 35, 'maxvibration': 21, 'asperity': 0.34}\ndp3 = {'partno': 130, 'maxtemp': 56, 'mintemp': 46, 'maxvibration': 3412, 'asperity': 12.42}\ndp4 = {'partno': 131, 'maxtemp': 58, 'mintemp': 48, 'maxvibration': 3542, 'asperity': 13.43}"
        },
        {
            "execution_count": 30,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "### Harcoding a rule \n\ndef predict(dp):\n    if dp['maxvibration']> 100:\n        return 13\n    else:\n        return 0.33\n    "
        },
        {
            "execution_count": 31,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "execution_count": 31,
                    "metadata": {},
                    "data": {
                        "text/plain": "0.33"
                    },
                    "output_type": "execute_result"
                }
            ],
            "source": "predict(dp1)"
        },
        {
            "execution_count": 32,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "execution_count": 32,
                    "metadata": {},
                    "data": {
                        "text/plain": "0.33"
                    },
                    "output_type": "execute_result"
                }
            ],
            "source": "predict(dp2)"
        },
        {
            "execution_count": 33,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "execution_count": 33,
                    "metadata": {},
                    "data": {
                        "text/plain": "13"
                    },
                    "output_type": "execute_result"
                }
            ],
            "source": "predict(dp3)"
        },
        {
            "execution_count": 34,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "execution_count": 34,
                    "metadata": {},
                    "data": {
                        "text/plain": "13"
                    },
                    "output_type": "execute_result"
                }
            ],
            "source": "predict(dp4)"
        },
        {
            "execution_count": 45,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "###Applying Linear regression model\nw1 = 0.30\nw2 = 0\nw3 = 0\nw4 = 13/3412.0\n\ndef mlpredict(dp):\n    return w1+w2*dp['maxtemp']+w3*dp['mintemp']+w4*dp['maxvibration']\n"
        },
        {
            "execution_count": 50,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "execution_count": 50,
                    "metadata": {},
                    "data": {
                        "text/plain": "13.795310668229778"
                    },
                    "output_type": "execute_result"
                }
            ],
            "source": "mlpredict(dp4)"
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "# The code was removed by Watson Studio for sharing."
        },
        {
            "execution_count": 2,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "spark = SparkSession\\\n    .builder\\\n    .appName(\"Cloudant Spark SQL Example in Python using temp tables\")\\\n    .config(\"cloudant.host\",credentials_1['custom_url'].split('@')[1])\\\n    .config(\"cloudant.username\", credentials_1['username'])\\\n    .config(\"cloudant.password\",credentials_1['password'])\\\n    .config(\"jsonstore.rdd.partitions\", 1)\\\n    .getOrCreate()"
        },
        {
            "execution_count": 3,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "output_type": "stream",
                    "name": "stdout",
                    "text": "+-----+--------+----+----+----+--------------------+--------------------+\n|CLASS|SENSORID|   X|   Y|   Z|                 _id|                _rev|\n+-----+--------+----+----+----+--------------------+--------------------+\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n|    0|  asnota|4.33|4.33|4.33|0185f877f56850414...|1-7c4dcc1c51393ac...|\n|    0|  asnota|0.35|0.35|0.35|0185f877f56850414...|1-3fefdce3a8c95e8...|\n|    0|  asnota| 0.4| 0.4| 0.4|0185f877f56850414...|1-23ff5874b4b3eaf...|\n|    0|  asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|\n|    0|  asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n+-----+--------+----+----+----+--------------------+--------------------+\nonly showing top 20 rows\n\n"
                }
            ],
            "source": "df=spark.read.load('shake', \"com.cloudant.spark\")\n\ndf.createOrReplaceTempView(\"df\")\nspark.sql(\"SELECT * from df\").show()"
        },
        {
            "execution_count": 7,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "output_type": "stream",
                    "name": "stdout",
                    "text": "+-----------------+-----+\n|            label|class|\n+-----------------+-----+\n|48.10172969862932|    0|\n|137.8929019202952|    1|\n+-----------------+-----+\n\n"
                }
            ],
            "source": "### Create a feature engeneered column \"label\", computing the energy of the device\n#(in real-world scenario the physical mass of the device should be taken into consideration)\n\ndf_energy = spark.sql('''\n\nSELECT sqrt(SUM(x*x)+SUM(y*y)+SUM(z*z)) AS label, class FROM df group BY class\n\n''')\ndf_energy.show()\n\n###Create a new dataframe and join the column to the original data\n#in order to perform SQL queries later on using whole data in one df. \n\ndf_energy.createOrReplaceTempView('df_energy')\n"
        },
        {
            "execution_count": 8,
            "cell_type": "code",
            "metadata": {},
            "outputs": [
                {
                    "output_type": "stream",
                    "name": "stdout",
                    "text": "+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\n|CLASS|SENSORID|   X|   Y|   Z|                 _id|                _rev|            label|CLASS|\n+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932|    0|\n|    0|  asnota|4.33|4.33|4.33|0185f877f56850414...|1-7c4dcc1c51393ac...|48.10172969862932|    0|\n|    0|  asnota|0.35|0.35|0.35|0185f877f56850414...|1-3fefdce3a8c95e8...|48.10172969862932|    0|\n|    0|  asnota| 0.4| 0.4| 0.4|0185f877f56850414...|1-23ff5874b4b3eaf...|48.10172969862932|    0|\n|    0|  asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|48.10172969862932|    0|\n|    0|  asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|48.10172969862932|    0|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932|    0|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932|    0|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932|    0|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932|    0|\n|    0|  asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932|    0|\n+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\nonly showing top 20 rows\n\n"
                }
            ],
            "source": "df_join = spark.sql('''\n\nSELECT * FROM df INNER JOIN df_energy ON df.class=df_energy.class\n\n''')\n\ndf_join.show()"
        },
        {
            "execution_count": 20,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "from pyspark.ml.feature import VectorAssembler\nfrom pyspark.ml.feature import Normalizer\n\nvectorAssembler = VectorAssembler(inputCols=[\"X\", \"Y\", \"Z\"],\n                                 outputCol=\"features\")\nnormalizer = Normalizer(inputCol=\"features\", outputCol=\"features_norm\", p=1.0)\n"
        },
        {
            "execution_count": 21,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "### Create liner regression model\nfrom pyspark.ml.regression import LinearRegression\n\nlr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)\n"
        },
        {
            "execution_count": 15,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": "from pyspark.ml import Pipeline\n\npipeline = Pipeline(stages=[vectorAssembler, normalizer, lr])"
        },
        {
            "execution_count": null,
            "cell_type": "code",
            "metadata": {},
            "outputs": [],
            "source": ""
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3.5 with Spark 2.1",
            "name": "python3-spark21",
            "language": "python"
        },
        "language_info": {
            "mimetype": "text/x-python",
            "nbconvert_exporter": "python",
            "version": "3.5.4",
            "name": "python",
            "file_extension": ".py",
            "pygments_lexer": "ipython3",
            "codemirror_mode": {
                "version": 3,
                "name": "ipython"
            }
        }
    },
    "nbformat": 4
}