Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <algorithm>
- #include <numeric>
- #include <tr1/array>
- #include <boost/mpl/size_t.hpp>
- #include <boost/timer.hpp>
- #include <boost/proto/proto.hpp>
- #include <boost/tuple/tuple.hpp>
- namespace proto = boost::proto;
- namespace mpl = boost::mpl;
- typedef boost::timer timer_type;
- struct tuple_get : proto::callable
- {
- template<typename Sig> struct result;
- // this overload seems to be needed
- template<typename This, typename ArgNTag, typename Tuple>
- struct result<This(ArgNTag, Tuple &)>
- {
- // ArgNTag is an instance of argN_tag, which has a nested index typedef
- typedef typename ArgNTag::index N;
- // boost::tuples::element is a trait that returns the type of the Nth element
- typedef typename boost::tuples::element<N::value, Tuple>::type type;
- };
- template<typename This, typename ArgNTag, typename Tuple>
- struct result<This(ArgNTag const &, Tuple const &)>
- {
- // ArgNTag is an instance of argN_tag, which has a nested index typedef
- typedef typename ArgNTag::index N;
- // boost::tuples::element is a trait that returns the type of the Nth element
- typedef typename boost::tuples::element<N::value, Tuple>::type type;
- };
- template<typename ArgNTag, typename Tuple>
- typename result<tuple_get(ArgNTag const &, Tuple const &)>::type
- operator()(ArgNTag const &, Tuple const & tup) const
- {
- typedef typename ArgNTag::index N;
- // boost::get is a function that returns the Nth element of a tuple.
- return boost::get<N::value>(tup);
- }
- };
- namespace linear_algebra
- {
- const size_t n = 512;
- typedef std::tr1::array< double , n > state_type;
- template<typename T > struct is_terminal : mpl::false_ {};
- template<> struct is_terminal< state_type > : mpl::true_ {};
- template<> struct is_terminal< double > : mpl::true_ {};
- template<typename Index> struct argN_tag { typedef Index index; };
- template<typename Index> struct argN_term : proto::terminal< argN_tag<Index> > {};
- argN_term<mpl::size_t<0> >::type const arg1 = {};
- argN_term<mpl::size_t<1> >::type const arg2 = {};
- BOOST_PROTO_DEFINE_OPERATORS( is_terminal , proto::default_domain )
- struct vector_context : proto::callable_context< vector_context const >
- {
- size_t m_i;
- vector_context( size_t i ) : m_i( i ) { }
- typedef double result_type;
- double operator()( proto::tag::terminal , state_type & arr ) const
- {
- return arr[ m_i ];
- }
- };
- using namespace boost::proto;
- // Take any expression and turn each node
- // into a subscript expression, using the
- // state as the RHS.
- struct Distribute:
- proto::or_<
- when<terminal<state_type>, _make_subscript(_, _state)>,
- when<terminal<argN_tag<_> >, _make_subscript(_, _state)>,
- terminal<_>,
- nary_expr<_, vararg<Distribute> >
- >
- {};
- struct Optimize:
- or_<
- when<
- subscript<Distribute, terminal<_> >,
- Distribute(_left, _right)
- >,
- nary_expr<_, vararg<Optimize> >,
- terminal<_>
- >
- {};
- struct EvalOpt:
- when<Distribute, _default<>(Optimize)>
- {};
- struct EvalOpt2:
- or_<
- when< terminal< argN_tag< _ > >
- , tuple_get(_value(_), _state)
- >,
- otherwise< _default< EvalOpt2>( Optimize(proto::_), proto::_state) >
- >
- {};
- }
- template< typename Expr >
- void assign_proto( linear_algebra::state_type &x , Expr const & expr ) __attribute__((noinline));
- template< typename Expr >
- void assign_proto( linear_algebra::state_type &x , Expr const & expr )
- {
- using namespace linear_algebra;
- for( size_t i=0 ; i<n ; ++i )
- {
- vector_context ctx( i );
- x[i] = proto::eval( expr , ctx );
- }
- }
- template< typename Expr >
- void assign_proto_trans( linear_algebra::state_type &x, const Expr& expr) __attribute__((noinline));
- template< typename Expr >
- void assign_proto_trans( linear_algebra::state_type &x , const Expr& expr)
- {
- using namespace linear_algebra;
- for( size_t i=0 ; i<n ; ++i )
- {
- x[i] = EvalOpt()( expr[i] );
- }
- }
- template< typename Expr >
- void assign_proto_trans2( linear_algebra::state_type &x,
- linear_algebra::state_type const& x1,
- linear_algebra::state_type const& x2,
- const Expr& expr) __attribute__((noinline));
- template< typename Expr >
- void assign_proto_trans2( linear_algebra::state_type &x ,
- linear_algebra::state_type const& x1,
- linear_algebra::state_type const& x2,
- const Expr& expr)
- {
- using namespace linear_algebra;
- for( size_t i=0 ; i<n ; ++i )
- {
- // notice boost::ref here, its important
- x[i] = EvalOpt2()( expr[i], boost::make_tuple(boost::ref(x1),boost::ref(x2)) );
- }
- }
- void assign_full( linear_algebra::state_type &x3 ,
- const linear_algebra::state_type &x1 ,
- const linear_algebra::state_type &x2 ) __attribute__((noinline));
- void assign_full( linear_algebra::state_type &x3,
- linear_algebra::state_type const &x1,
- linear_algebra::state_type const &x2 )
- {
- for( size_t i=0 ; i<linear_algebra::n ; ++i )
- x3[i] = x1[i] + x2[i] * 2.0 + x1[i] * 10.0 + 2.0 * ( x1[i] + 3.0 * x2[i] );
- }
- int main( int argc , char **argv )
- {
- using namespace linear_algebra;
- std::clog << n << std::endl;
- const size_t num_of_steps = 1000000;
- double t1 = 0.0 , t2 = 0.0, t3 = 0.0, t4 = 0.0;
- size_t count = 0;
- timer_type timer;
- while( true )
- {
- ++count;
- state_type x1 , x2 , x3_1 , x3_2, x3_3, x3_4;
- std::fill( x3_1.begin() , x3_1.end() , 0.0 );
- std::fill( x3_2.begin() , x3_2.end() , 0.0 );
- std::fill( x3_3.begin() , x3_3.end() , 0.0 );
- std::fill( x3_4.begin() , x3_4.end() , 0.0 );
- for( size_t i=0 ; i<n ; ++i )
- {
- x1[i] = drand48() - 0.5;
- x2[i] = drand48() - 0.5;
- }
- timer.restart();
- for( size_t i=0 ; i<num_of_steps ; ++i )
- assign_proto( x3_1 , x1 + x2 * 2.0 + x1 * 10.0 + 2.0 * ( x1 + 3.0 * x2 ) );
- t1 += timer.elapsed();
- timer.restart();
- for( size_t i=0 ; i<num_of_steps ; ++i )
- assign_full( x3_2 , x1 , x2 );
- t2 += timer.elapsed();
- timer.restart();
- for( size_t i=0 ; i<num_of_steps ; ++i )
- assign_proto_trans( x3_3 , x1 + x2 * 2.0 + x1 * 10.0 + 2.0 * ( x1 + 3.0 * x2 ) );
- t3 += timer.elapsed();
- timer.restart();
- for( size_t i=0 ; i<num_of_steps ; ++i )
- assign_proto_trans2( x3_4, x1, x2, arg1 + arg2*2.0 + arg1 * 10.0 + 2.0 * (arg1 + 3.0*arg2));
- t4 += timer.elapsed();
- std::clog.precision( 8 );
- std::clog.width( 10 );
- std::clog << count << " " << t1 / double( count )
- << " " << t3 / double( count )
- << " " << t4 / double( count )
- << " " << t2 / double( count ) << " ";
- std::clog << std::accumulate( x3_1.begin() , x3_1.end() , 0.0 ) << " ";
- std::clog << std::accumulate( x3_3.begin() , x3_3.end() , 0.0 ) << " ";
- std::clog << std::accumulate( x3_4.begin() , x3_4.end() , 0.0 ) << " ";
- std::clog << std::accumulate( x3_2.begin() , x3_2.end() , 0.0 ) << std::endl;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement