Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "nbformat_minor": 1,
- "cells": [
- {
- "execution_count": 25,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 25,
- "metadata": {},
- "data": {
- "text/plain": "{'asperity': 0.32,\n 'maxtemp': 35,\n 'maxvibration': 12,\n 'mintemp': 35,\n 'partno': 100}"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "dp = {'partno': 100, 'maxtemp': 35, 'mintemp': 35, 'maxvibration': 12, 'asperity': 0.32}\ndp"
- },
- {
- "execution_count": 26,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "dp1 = {'partno': 100, 'maxtemp': 35, 'mintemp': 35, 'maxvibration': 12, 'asperity': 0.32}\ndp2 = {'partno': 101, 'maxtemp': 46, 'mintemp': 35, 'maxvibration': 21, 'asperity': 0.34}\ndp3 = {'partno': 130, 'maxtemp': 56, 'mintemp': 46, 'maxvibration': 3412, 'asperity': 12.42}\ndp4 = {'partno': 131, 'maxtemp': 58, 'mintemp': 48, 'maxvibration': 3542, 'asperity': 13.43}"
- },
- {
- "execution_count": 30,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "### Harcoding a rule \n\ndef predict(dp):\n if dp['maxvibration']> 100:\n return 13\n else:\n return 0.33\n "
- },
- {
- "execution_count": 31,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 31,
- "metadata": {},
- "data": {
- "text/plain": "0.33"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "predict(dp1)"
- },
- {
- "execution_count": 32,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 32,
- "metadata": {},
- "data": {
- "text/plain": "0.33"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "predict(dp2)"
- },
- {
- "execution_count": 33,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 33,
- "metadata": {},
- "data": {
- "text/plain": "13"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "predict(dp3)"
- },
- {
- "execution_count": 34,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 34,
- "metadata": {},
- "data": {
- "text/plain": "13"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "predict(dp4)"
- },
- {
- "execution_count": 45,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "###Applying Linear regression model\nw1 = 0.30\nw2 = 0\nw3 = 0\nw4 = 13/3412.0\n\ndef mlpredict(dp):\n return w1+w2*dp['maxtemp']+w3*dp['mintemp']+w4*dp['maxvibration']\n"
- },
- {
- "execution_count": 50,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 50,
- "metadata": {},
- "data": {
- "text/plain": "13.795310668229778"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "mlpredict(dp4)"
- },
- {
- "execution_count": null,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "# The code was removed by Watson Studio for sharing."
- },
- {
- "execution_count": 2,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "spark = SparkSession\\\n .builder\\\n .appName(\"Cloudant Spark SQL Example in Python using temp tables\")\\\n .config(\"cloudant.host\",credentials_1['custom_url'].split('@')[1])\\\n .config(\"cloudant.username\", credentials_1['username'])\\\n .config(\"cloudant.password\",credentials_1['password'])\\\n .config(\"jsonstore.rdd.partitions\", 1)\\\n .getOrCreate()"
- },
- {
- "execution_count": 3,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": "+-----+--------+----+----+----+--------------------+--------------------+\n|CLASS|SENSORID| X| Y| Z| _id| _rev|\n+-----+--------+----+----+----+--------------------+--------------------+\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota|4.33|4.33|4.33|0185f877f56850414...|1-7c4dcc1c51393ac...|\n| 0| asnota|0.35|0.35|0.35|0185f877f56850414...|1-3fefdce3a8c95e8...|\n| 0| asnota| 0.4| 0.4| 0.4|0185f877f56850414...|1-23ff5874b4b3eaf...|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n+-----+--------+----+----+----+--------------------+--------------------+\nonly showing top 20 rows\n\n"
- }
- ],
- "source": "df=spark.read.load('shake', \"com.cloudant.spark\")\n\ndf.createOrReplaceTempView(\"df\")\nspark.sql(\"SELECT * from df\").show()"
- },
- {
- "execution_count": 7,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": "+-----------------+-----+\n| label|class|\n+-----------------+-----+\n|48.10172969862932| 0|\n|137.8929019202952| 1|\n+-----------------+-----+\n\n"
- }
- ],
- "source": "### Create a feature engeneered column \"label\", computing the energy of the device\n#(in real-world scenario the physical mass of the device should be taken into consideration)\n\ndf_energy = spark.sql('''\n\nSELECT sqrt(SUM(x*x)+SUM(y*y)+SUM(z*z)) AS label, class FROM df group BY class\n\n''')\ndf_energy.show()\n\n###Create a new dataframe and join the column to the original data\n#in order to perform SQL queries later on using whole data in one df. \n\ndf_energy.createOrReplaceTempView('df_energy')\n"
- },
- {
- "execution_count": 8,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": "+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\n|CLASS|SENSORID| X| Y| Z| _id| _rev| label|CLASS|\n+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota|4.33|4.33|4.33|0185f877f56850414...|1-7c4dcc1c51393ac...|48.10172969862932| 0|\n| 0| asnota|0.35|0.35|0.35|0185f877f56850414...|1-3fefdce3a8c95e8...|48.10172969862932| 0|\n| 0| asnota| 0.4| 0.4| 0.4|0185f877f56850414...|1-23ff5874b4b3eaf...|48.10172969862932| 0|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|48.10172969862932| 0|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\nonly showing top 20 rows\n\n"
- }
- ],
- "source": "df_join = spark.sql('''\n\nSELECT * FROM df INNER JOIN df_energy ON df.class=df_energy.class\n\n''')\n\ndf_join.show()"
- },
- {
- "execution_count": 20,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "from pyspark.ml.feature import VectorAssembler\nfrom pyspark.ml.feature import Normalizer\n\nvectorAssembler = VectorAssembler(inputCols=[\"X\", \"Y\", \"Z\"],\n outputCol=\"features\")\nnormalizer = Normalizer(inputCol=\"features\", outputCol=\"features_norm\", p=1.0)\n"
- },
- {
- "execution_count": 21,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "### Create liner regression model\nfrom pyspark.ml.regression import LinearRegression\n\nlr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)\n"
- },
- {
- "execution_count": 15,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "from pyspark.ml import Pipeline\n\npipeline = Pipeline(stages=[vectorAssembler, normalizer, lr])"
- },
- {
- "execution_count": null,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": ""
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3.5 with Spark 2.1",
- "name": "python3-spark21",
- "language": "python"
- },
- "language_info": {
- "mimetype": "text/x-python",
- "nbconvert_exporter": "python",
- "version": "3.5.4",
- "name": "python",
- "file_extension": ".py",
- "pygments_lexer": "ipython3",
- "codemirror_mode": {
- "version": 3,
- "name": "ipython"
- }
- }
- },
- "nbformat": 4
- }
Add Comment
Please, Sign In to add comment