Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "path_inputs = \"/data/master/finance/xrro/data/\"\n",
- "usuario = \"E017542\"\n",
- "b1_name = \"t_xrro_operation_attributes\"\n",
- "b2_name = \"t_xrro_customer_attributes\"\n",
- "b3_name = \"t_xrro_financial_metrics_ml\"\n",
- "b4_name = \"t_xrro_economic_metrics_ml\"\n",
- "b5_name = \"t_xrro_regulatory_metrics_ml\"\n",
- "b7_name = \"t_xrro_prospective_metrics_ml\"\n",
- "primary_key = [ \"main_operation_id\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "df_B1 = spark.read.parquet(path_inputs + b1_name)\n",
- "df_B2 = spark.read.parquet(path_inputs + b2_name)\n",
- "df_B3 = spark.read.parquet(path_inputs + b3_name)\n",
- "df_B4 = spark.read.parquet(path_inputs + b4_name)\n",
- "df_B5 = spark.read.parquet(path_inputs + b5_name)\n",
- "df_B7 = spark.read.parquet(path_inputs + b7_name)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "\n",
- "#Dataset1 prospectivo\n",
- "\n",
- "\"\"\"\n",
- "b1_list = primary_key + primary_key_2 + [\"cutoff_date\",\"main_efan_area_id\",\"operation_holding_situation_id\",\"block_epigraph_type\"]\n",
- "b2_list = primary_key_2 + [\"local_commercial_segment_id\"]\n",
- "b7_list = primary_key + [\"cust_nii_stk_lc_amount\", \"fundg_nii_stk_lc_amount\", \"net_fee_stk_lc_amount\",\\\n",
- " \"rof_stk_lc_amount\",\"tax_stk_lc_amount\"]\n",
- "b4_list = primary_key + [\"ek_crr_eop_lc_amount\", \"ek_or_eop_lc_amount\",\"ec_el_ttc_eop_lc_amount\"]\n",
- "b5_list = primary_key + [\"rk_crr_eop_lc_amount\", \"rk_or_eop_lc_amount\"]\n",
- "df_B1_select = df_B1.select(b1_list)\n",
- "df_B2_select = df_B2.select(b2_list)\n",
- "df_B7_select = df_B7.select(b7_list)\n",
- "df_B4_select = df_B4.select(b4_list)\n",
- "df_B5_select = df_B5.select(b5_list)\n",
- "cruce1 = df_B7_select.join(df_B1_select, on = primary_key, how = \"left\")\n",
- "cruce2 = cruce1.join(df_B4_select, on = primary_key, how = \"left\")\n",
- "cruce3 = cruce2.join(df_B5_select, on = primary_key, how = \"left\")\n",
- "cruce3.toPandas().to_csv(\"Prueba1.txt\",sep=';',header=True,index=False,mode='w',decimal=',')\n",
- "#list_filter = [\"CE0061235\"]\n",
- "#salida_filtrada = cruce3.filter(cruce3[\"main_efan_area_id\"].isin(list_filter))\n",
- "#salida_filtrada.toPandas().to_csv(\"salida_filtrada.txt\",sep=';',header=True,index=False,mode='w',decimal=',')\n",
- "#cruce3.groupBy([\"cutoff_date\",\"main_efan_area_id\",\"block_epigraph_type\",\"operation_holding_situation_id\"]).sum().toPandas()\n",
- "\"\"\"\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "#GF = 93\n",
- "#GTB= 94\n",
- "\n",
- "Producto=[\"93\"]\n",
- "Fecha=[\"2019-04-30\"]\n",
- "B1_Filtrado=df_B1.filter(df_B1[\"operation_repository_id\"].isin(Producto)).filter(df_B1[\"cutoff_date\"].isin(Fecha))\n",
- "B3_Filtrado=df_B3.filter(df_B3[\"operation_repository_id\"].isin(Producto)).filter(df_B3[\"cutoff_date\"].isin(Fecha))\n",
- "B4_Filtrado=df_B4.filter(df_B4[\"operation_repository_id\"].isin(Producto)).filter(df_B4[\"cutoff_date\"].isin(Fecha))\n",
- "B5_Filtrado=df_B5.filter(df_B5[\"operation_repository_id\"].isin(Producto)).filter(df_B5[\"cutoff_date\"].isin(Fecha))\n",
- "B7_Filtrado=df_B7.filter(df_B7[\"operation_repository_id\"].isin(Producto)).filter(df_B7[\"cutoff_date\"].isin(Fecha))\n",
- "\n",
- "#B7_max_odate = [str(row['odate_date']) for row in B7_Filtrado.select('odate_date').distinct().collect()]\n",
- "#B7_Filtrado_max_odate= B7_Filtrado.filter(B7_Filtrado[\"odate_date\"].isin(B7_max_odate))\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Dataset1 prospectivo 2\n",
- "\n",
- "b1_list = primary_key + [\"cutoff_date\",\"main_efan_area_id\",\"operation_holding_situation_id\",\"block_epigraph_type\"]\n",
- "b4_list = primary_key + [\"ek_crr_eop_lc_amount\", \"ek_or_eop_lc_amount\",\"ec_el_ttc_eop_lc_amount\"]\n",
- "b5_list = primary_key + [\"rk_crr_eop_lc_amount\", \"rk_or_eop_lc_amount\"]\n",
- "df_B1_select = B1_Filtrado.select(b1_list)\n",
- "df_B4_select = B4_Filtrado.select(b4_list)\n",
- "df_B5_select = B5_Filtrado.select(b5_list)\n",
- "cruce1 = B7_Filtrado.drop('cutoff_date').join(df_B1_select, on = primary_key, how = \"inner\")\n",
- "cruce2 = cruce1.join(df_B4_select, on = primary_key, how = \"inner\")\n",
- "cruce3 = cruce2.join(df_B5_select, on = primary_key, how = \"inner\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "cruce3.toPandas().to_csv(\"Abril.txt\",sep=';',header=True,index=False,mode='w',decimal=',')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "583"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "cruce1.count()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[Row(operation_repository_id='93')]"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "B7_Filtrado.select('operation_repository_id').distinct().collect()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "583"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "B1_Filtrado.dropDuplicates().count()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "local1-spark2python3",
- "language": "python",
- "name": "spark-python-local1-spark2python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement