Guest User

Untitled

a guest
Jan 17th, 2019
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.72 KB | None | 0 0
  1. {
  2. "nbformat_minor": 1,
  3. "cells": [
  4. {
  5. "execution_count": 25,
  6. "cell_type": "code",
  7. "metadata": {},
  8. "outputs": [
  9. {
  10. "execution_count": 25,
  11. "metadata": {},
  12. "data": {
  13. "text/plain": "{'asperity': 0.32,\n 'maxtemp': 35,\n 'maxvibration': 12,\n 'mintemp': 35,\n 'partno': 100}"
  14. },
  15. "output_type": "execute_result"
  16. }
  17. ],
  18. "source": "dp = {'partno': 100, 'maxtemp': 35, 'mintemp': 35, 'maxvibration': 12, 'asperity': 0.32}\ndp"
  19. },
  20. {
  21. "execution_count": 26,
  22. "cell_type": "code",
  23. "metadata": {},
  24. "outputs": [],
  25. "source": "dp1 = {'partno': 100, 'maxtemp': 35, 'mintemp': 35, 'maxvibration': 12, 'asperity': 0.32}\ndp2 = {'partno': 101, 'maxtemp': 46, 'mintemp': 35, 'maxvibration': 21, 'asperity': 0.34}\ndp3 = {'partno': 130, 'maxtemp': 56, 'mintemp': 46, 'maxvibration': 3412, 'asperity': 12.42}\ndp4 = {'partno': 131, 'maxtemp': 58, 'mintemp': 48, 'maxvibration': 3542, 'asperity': 13.43}"
  26. },
  27. {
  28. "execution_count": 30,
  29. "cell_type": "code",
  30. "metadata": {},
  31. "outputs": [],
  32. "source": "### Harcoding a rule \n\ndef predict(dp):\n if dp['maxvibration']> 100:\n return 13\n else:\n return 0.33\n "
  33. },
  34. {
  35. "execution_count": 31,
  36. "cell_type": "code",
  37. "metadata": {},
  38. "outputs": [
  39. {
  40. "execution_count": 31,
  41. "metadata": {},
  42. "data": {
  43. "text/plain": "0.33"
  44. },
  45. "output_type": "execute_result"
  46. }
  47. ],
  48. "source": "predict(dp1)"
  49. },
  50. {
  51. "execution_count": 32,
  52. "cell_type": "code",
  53. "metadata": {},
  54. "outputs": [
  55. {
  56. "execution_count": 32,
  57. "metadata": {},
  58. "data": {
  59. "text/plain": "0.33"
  60. },
  61. "output_type": "execute_result"
  62. }
  63. ],
  64. "source": "predict(dp2)"
  65. },
  66. {
  67. "execution_count": 33,
  68. "cell_type": "code",
  69. "metadata": {},
  70. "outputs": [
  71. {
  72. "execution_count": 33,
  73. "metadata": {},
  74. "data": {
  75. "text/plain": "13"
  76. },
  77. "output_type": "execute_result"
  78. }
  79. ],
  80. "source": "predict(dp3)"
  81. },
  82. {
  83. "execution_count": 34,
  84. "cell_type": "code",
  85. "metadata": {},
  86. "outputs": [
  87. {
  88. "execution_count": 34,
  89. "metadata": {},
  90. "data": {
  91. "text/plain": "13"
  92. },
  93. "output_type": "execute_result"
  94. }
  95. ],
  96. "source": "predict(dp4)"
  97. },
  98. {
  99. "execution_count": 45,
  100. "cell_type": "code",
  101. "metadata": {},
  102. "outputs": [],
  103. "source": "###Applying Linear regression model\nw1 = 0.30\nw2 = 0\nw3 = 0\nw4 = 13/3412.0\n\ndef mlpredict(dp):\n return w1+w2*dp['maxtemp']+w3*dp['mintemp']+w4*dp['maxvibration']\n"
  104. },
  105. {
  106. "execution_count": 50,
  107. "cell_type": "code",
  108. "metadata": {},
  109. "outputs": [
  110. {
  111. "execution_count": 50,
  112. "metadata": {},
  113. "data": {
  114. "text/plain": "13.795310668229778"
  115. },
  116. "output_type": "execute_result"
  117. }
  118. ],
  119. "source": "mlpredict(dp4)"
  120. },
  121. {
  122. "execution_count": null,
  123. "cell_type": "code",
  124. "metadata": {},
  125. "outputs": [],
  126. "source": "# The code was removed by Watson Studio for sharing."
  127. },
  128. {
  129. "execution_count": 2,
  130. "cell_type": "code",
  131. "metadata": {},
  132. "outputs": [],
  133. "source": "spark = SparkSession\\\n .builder\\\n .appName(\"Cloudant Spark SQL Example in Python using temp tables\")\\\n .config(\"cloudant.host\",credentials_1['custom_url'].split('@')[1])\\\n .config(\"cloudant.username\", credentials_1['username'])\\\n .config(\"cloudant.password\",credentials_1['password'])\\\n .config(\"jsonstore.rdd.partitions\", 1)\\\n .getOrCreate()"
  134. },
  135. {
  136. "execution_count": 3,
  137. "cell_type": "code",
  138. "metadata": {},
  139. "outputs": [
  140. {
  141. "output_type": "stream",
  142. "name": "stdout",
  143. "text": "+-----+--------+----+----+----+--------------------+--------------------+\n|CLASS|SENSORID| X| Y| Z| _id| _rev|\n+-----+--------+----+----+----+--------------------+--------------------+\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|\n| 0| asnota|4.33|4.33|4.33|0185f877f56850414...|1-7c4dcc1c51393ac...|\n| 0| asnota|0.35|0.35|0.35|0185f877f56850414...|1-3fefdce3a8c95e8...|\n| 0| asnota| 0.4| 0.4| 0.4|0185f877f56850414...|1-23ff5874b4b3eaf...|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|\n+-----+--------+----+----+----+--------------------+--------------------+\nonly showing top 20 rows\n\n"
  144. }
  145. ],
  146. "source": "df=spark.read.load('shake', \"com.cloudant.spark\")\n\ndf.createOrReplaceTempView(\"df\")\nspark.sql(\"SELECT * from df\").show()"
  147. },
  148. {
  149. "execution_count": 7,
  150. "cell_type": "code",
  151. "metadata": {},
  152. "outputs": [
  153. {
  154. "output_type": "stream",
  155. "name": "stdout",
  156. "text": "+-----------------+-----+\n| label|class|\n+-----------------+-----+\n|48.10172969862932| 0|\n|137.8929019202952| 1|\n+-----------------+-----+\n\n"
  157. }
  158. ],
  159. "source": "### Create a feature engeneered column \"label\", computing the energy of the device\n#(in real-world scenario the physical mass of the device should be taken into consideration)\n\ndf_energy = spark.sql('''\n\nSELECT sqrt(SUM(x*x)+SUM(y*y)+SUM(z*z)) AS label, class FROM df group BY class\n\n''')\ndf_energy.show()\n\n###Create a new dataframe and join the column to the original data\n#in order to perform SQL queries later on using whole data in one df. \n\ndf_energy.createOrReplaceTempView('df_energy')\n"
  160. },
  161. {
  162. "execution_count": 8,
  163. "cell_type": "code",
  164. "metadata": {},
  165. "outputs": [
  166. {
  167. "output_type": "stream",
  168. "name": "stdout",
  169. "text": "+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\n|CLASS|SENSORID| X| Y| Z| _id| _rev| label|CLASS|\n+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota| 0.5| 0.5| 0.5|0185f877f56850414...|1-fd3c646751f199e...|48.10172969862932| 0|\n| 0| asnota|4.33|4.33|4.33|0185f877f56850414...|1-7c4dcc1c51393ac...|48.10172969862932| 0|\n| 0| asnota|0.35|0.35|0.35|0185f877f56850414...|1-3fefdce3a8c95e8...|48.10172969862932| 0|\n| 0| asnota| 0.4| 0.4| 0.4|0185f877f56850414...|1-23ff5874b4b3eaf...|48.10172969862932| 0|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|48.10172969862932| 0|\n| 0| asnota|0.43|0.43|0.43|0185f877f56850414...|1-cafe693d29323a8...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n| 0| asnota|0.49|0.49|0.49|0185f877f56850414...|1-bef4c399eda609f...|48.10172969862932| 0|\n+-----+--------+----+----+----+--------------------+--------------------+-----------------+-----+\nonly showing top 20 rows\n\n"
  170. }
  171. ],
  172. "source": "df_join = spark.sql('''\n\nSELECT * FROM df INNER JOIN df_energy ON df.class=df_energy.class\n\n''')\n\ndf_join.show()"
  173. },
  174. {
  175. "execution_count": 20,
  176. "cell_type": "code",
  177. "metadata": {},
  178. "outputs": [],
  179. "source": "from pyspark.ml.feature import VectorAssembler\nfrom pyspark.ml.feature import Normalizer\n\nvectorAssembler = VectorAssembler(inputCols=[\"X\", \"Y\", \"Z\"],\n outputCol=\"features\")\nnormalizer = Normalizer(inputCol=\"features\", outputCol=\"features_norm\", p=1.0)\n"
  180. },
  181. {
  182. "execution_count": 21,
  183. "cell_type": "code",
  184. "metadata": {},
  185. "outputs": [],
  186. "source": "### Create liner regression model\nfrom pyspark.ml.regression import LinearRegression\n\nlr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)\n"
  187. },
  188. {
  189. "execution_count": 15,
  190. "cell_type": "code",
  191. "metadata": {},
  192. "outputs": [],
  193. "source": "from pyspark.ml import Pipeline\n\npipeline = Pipeline(stages=[vectorAssembler, normalizer, lr])"
  194. },
  195. {
  196. "execution_count": null,
  197. "cell_type": "code",
  198. "metadata": {},
  199. "outputs": [],
  200. "source": ""
  201. }
  202. ],
  203. "metadata": {
  204. "kernelspec": {
  205. "display_name": "Python 3.5 with Spark 2.1",
  206. "name": "python3-spark21",
  207. "language": "python"
  208. },
  209. "language_info": {
  210. "mimetype": "text/x-python",
  211. "nbconvert_exporter": "python",
  212. "version": "3.5.4",
  213. "name": "python",
  214. "file_extension": ".py",
  215. "pygments_lexer": "ipython3",
  216. "codemirror_mode": {
  217. "version": 3,
  218. "name": "ipython"
  219. }
  220. }
  221. },
  222. "nbformat": 4
  223. }
Add Comment
Please, Sign In to add comment