Advertisement
Guest User

Untitled

a guest
Jul 28th, 2017
44
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 7.60 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <algorithm>
  4. #include <numeric>
  5. #include <tr1/array>
  6. #include <boost/mpl/size_t.hpp>
  7.  
  8. #include <boost/timer.hpp>
  9.  
  10. #include <boost/proto/proto.hpp>
  11. #include <boost/tuple/tuple.hpp>
  12.  
  13. namespace proto = boost::proto;
  14. namespace mpl = boost::mpl;
  15.  
  16. typedef boost::timer timer_type;
  17.  
  18. struct tuple_get : proto::callable
  19. {
  20.     template<typename Sig> struct result;
  21.  
  22.     // this overload seems to be needed
  23.     template<typename This, typename ArgNTag, typename Tuple>
  24.     struct result<This(ArgNTag, Tuple &)>
  25.     {  
  26.     // ArgNTag is an instance of argN_tag, which has a nested index typedef
  27.     typedef typename ArgNTag::index N;
  28.     // boost::tuples::element is a trait that returns the type of the Nth element
  29.     typedef typename boost::tuples::element<N::value, Tuple>::type type;
  30.     };  
  31.  
  32.     template<typename This, typename ArgNTag, typename Tuple>
  33.     struct result<This(ArgNTag const &, Tuple const &)>
  34.     {  
  35.     // ArgNTag is an instance of argN_tag, which has a nested index typedef
  36.     typedef typename ArgNTag::index N;
  37.     // boost::tuples::element is a trait that returns the type of the Nth element
  38.     typedef typename boost::tuples::element<N::value, Tuple>::type type;
  39.     };  
  40.  
  41.     template<typename ArgNTag, typename Tuple>
  42.     typename result<tuple_get(ArgNTag const &, Tuple const &)>::type
  43.     operator()(ArgNTag const &, Tuple const & tup) const
  44.     {  
  45.     typedef typename ArgNTag::index N;
  46.     // boost::get is a function that returns the Nth element of a tuple.
  47.     return boost::get<N::value>(tup);
  48.     }  
  49. };
  50.  
  51. namespace linear_algebra
  52. {
  53.     const size_t n = 512;
  54.  
  55.     typedef std::tr1::array< double , n > state_type;
  56.  
  57.     template<typename T > struct is_terminal : mpl::false_ {};
  58.     template<> struct is_terminal< state_type > : mpl::true_ {};
  59.     template<> struct is_terminal< double > : mpl::true_ {};
  60.  
  61.     template<typename Index> struct argN_tag { typedef Index index; };
  62.     template<typename Index> struct argN_term : proto::terminal< argN_tag<Index> > {};
  63.  
  64.     argN_term<mpl::size_t<0> >::type const arg1 = {};
  65.     argN_term<mpl::size_t<1> >::type const arg2 = {};
  66.  
  67.     BOOST_PROTO_DEFINE_OPERATORS( is_terminal , proto::default_domain )
  68.  
  69.     struct vector_context : proto::callable_context< vector_context const >
  70.     {
  71.         size_t m_i;
  72.  
  73.         vector_context( size_t i ) : m_i( i ) { }
  74.  
  75.         typedef double result_type;
  76.  
  77.         double operator()( proto::tag::terminal , state_type & arr ) const
  78.         {
  79.             return arr[ m_i ];
  80.         }
  81.     };
  82.  
  83.     using namespace boost::proto;
  84.  
  85.     // Take any expression and turn each node
  86.     // into a subscript expression, using the
  87.     // state as the RHS.
  88.     struct Distribute:
  89.       proto::or_<
  90.         when<terminal<state_type>, _make_subscript(_, _state)>,
  91.         when<terminal<argN_tag<_> >, _make_subscript(_, _state)>,
  92.         terminal<_>,
  93.         nary_expr<_, vararg<Distribute> >
  94.       >
  95.     {};
  96.  
  97.     struct Optimize:
  98.       or_<
  99.         when<
  100.  
  101.           subscript<Distribute, terminal<_> >,
  102.           Distribute(_left, _right)
  103.         >,
  104.         nary_expr<_, vararg<Optimize> >,
  105.         terminal<_>
  106.       >
  107.     {};
  108.  
  109.     struct EvalOpt:
  110.         when<Distribute, _default<>(Optimize)>
  111.     {};
  112.  
  113.     struct EvalOpt2:
  114.       or_<
  115.         when< terminal< argN_tag< _ > >
  116.         , tuple_get(_value(_), _state)
  117.         >,
  118.         otherwise< _default< EvalOpt2>( Optimize(proto::_), proto::_state) >
  119.       >
  120.     {};
  121. }
  122.  
  123. template< typename Expr >
  124. void assign_proto( linear_algebra::state_type &x , Expr const & expr ) __attribute__((noinline));
  125.  
  126. template< typename Expr >
  127. void assign_proto( linear_algebra::state_type &x , Expr const & expr )
  128. {
  129.     using namespace linear_algebra;
  130.     for( size_t i=0 ; i<n ; ++i )
  131.     {
  132.         vector_context ctx( i );
  133.         x[i] = proto::eval( expr , ctx );
  134.     }
  135. }
  136.  
  137. template< typename Expr >
  138. void assign_proto_trans( linear_algebra::state_type &x, const Expr& expr) __attribute__((noinline));
  139.  
  140. template< typename Expr >
  141. void assign_proto_trans( linear_algebra::state_type &x , const Expr& expr)
  142. {
  143.     using namespace linear_algebra;
  144.  
  145.     for( size_t i=0 ; i<n ; ++i )
  146.     {
  147.         x[i] = EvalOpt()( expr[i] );
  148.     }
  149. }
  150.  
  151. template< typename Expr >
  152. void assign_proto_trans2( linear_algebra::state_type &x,
  153.                           linear_algebra::state_type const& x1,
  154.                           linear_algebra::state_type const& x2,
  155.                           const Expr& expr) __attribute__((noinline));
  156.  
  157. template< typename Expr >
  158. void assign_proto_trans2( linear_algebra::state_type &x ,
  159.                           linear_algebra::state_type const& x1,
  160.                           linear_algebra::state_type const& x2,
  161.                           const Expr& expr)
  162. {
  163.     using namespace linear_algebra;
  164.  
  165.     for( size_t i=0 ; i<n ; ++i )
  166.     {
  167.         // notice boost::ref here, its important
  168.         x[i] = EvalOpt2()( expr[i], boost::make_tuple(boost::ref(x1),boost::ref(x2)) );
  169.     }
  170. }
  171.  
  172. void assign_full( linear_algebra::state_type &x3 ,
  173.                   const linear_algebra::state_type &x1 ,
  174.                   const linear_algebra::state_type &x2 ) __attribute__((noinline));
  175.  
  176. void assign_full( linear_algebra::state_type &x3,
  177.                   linear_algebra::state_type const &x1,
  178.                   linear_algebra::state_type const &x2 )
  179. {
  180.     for( size_t i=0 ; i<linear_algebra::n ; ++i )
  181.         x3[i] = x1[i] + x2[i] * 2.0 + x1[i] * 10.0 + 2.0 * ( x1[i] + 3.0 * x2[i] );
  182. }
  183.  
  184. int main( int argc , char **argv )
  185. {
  186.     using namespace linear_algebra;
  187.  
  188.     std::clog << n << std::endl;
  189.  
  190.     const size_t num_of_steps = 1000000;
  191.  
  192.     double t1 = 0.0 , t2 = 0.0, t3 = 0.0, t4 = 0.0;
  193.     size_t count = 0;
  194.     timer_type timer;
  195.  
  196.     while( true )
  197.     {
  198.         ++count;
  199.         state_type x1 , x2 , x3_1 , x3_2, x3_3, x3_4;
  200.         std::fill( x3_1.begin() , x3_1.end() , 0.0 );
  201.         std::fill( x3_2.begin() , x3_2.end() , 0.0 );
  202.         std::fill( x3_3.begin() , x3_3.end() , 0.0 );
  203.         std::fill( x3_4.begin() , x3_4.end() , 0.0 );
  204.  
  205.         for( size_t i=0 ; i<n ; ++i )
  206.         {
  207.             x1[i] = drand48() - 0.5;
  208.             x2[i] = drand48() - 0.5;
  209.         }
  210.  
  211.         timer.restart();
  212.         for( size_t i=0 ; i<num_of_steps ; ++i )
  213.             assign_proto( x3_1 , x1 + x2 * 2.0 + x1 * 10.0 + 2.0 * ( x1 + 3.0 * x2 ) );
  214.         t1 += timer.elapsed();
  215.  
  216.         timer.restart();
  217.         for( size_t i=0 ; i<num_of_steps ; ++i )
  218.             assign_full( x3_2 , x1 , x2 );
  219.         t2 += timer.elapsed();
  220.  
  221.         timer.restart();
  222.         for( size_t i=0 ; i<num_of_steps ; ++i )
  223.             assign_proto_trans( x3_3 , x1 + x2 * 2.0 + x1 * 10.0 + 2.0 * ( x1 + 3.0 * x2 ) );
  224.         t3 += timer.elapsed();
  225.  
  226.         timer.restart();
  227.         for( size_t i=0 ; i<num_of_steps ; ++i )
  228.             assign_proto_trans2( x3_4, x1, x2, arg1 + arg2*2.0 + arg1 * 10.0 + 2.0 * (arg1 + 3.0*arg2));
  229.         t4 += timer.elapsed();
  230.  
  231.  
  232.         std::clog.precision( 8 );
  233.         std::clog.width( 10 );
  234.         std::clog << count << " " << t1 / double( count )
  235.                   << " " << t3 / double( count )
  236.                   << " " << t4 / double( count )
  237.                   << " " << t2 / double( count ) << " ";
  238.  
  239.         std::clog << std::accumulate( x3_1.begin() , x3_1.end() , 0.0 ) << " ";
  240.         std::clog << std::accumulate( x3_3.begin() , x3_3.end() , 0.0 ) << " ";
  241.         std::clog << std::accumulate( x3_4.begin() , x3_4.end() , 0.0 ) << " ";
  242.         std::clog << std::accumulate( x3_2.begin() , x3_2.end() , 0.0 ) << std::endl;
  243.     }
  244.  
  245.     return 0;
  246. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement