freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][gsoc-anurag-2023] Speed-up SIMD


From: Anurag Thakur (@AdbhutDev)
Subject: [Git][freetype/freetype][gsoc-anurag-2023] Speed-up SIMD
Date: Wed, 04 Oct 2023 22:17:31 +0000

Anurag Thakur pushed to branch gsoc-anurag-2023 at FreeType / FreeType

Commits:

  • 66375b44
    by Anurag Thakur at 2023-10-05T03:46:59+05:30
    Speed-up SIMD
    

2 changed files:

Changes:

  • src/base/ftobjs.c
    ... ... @@ -3154,7 +3154,7 @@ int conic_to2(FT_GlyphSlot* slot, FT_Vector *control, FT_Vector *from, FT_Vector
    3154 3154
           face->garray = (FT_GlyphSlot*)malloc(
    
    3155 3155
               face->driver->clazz->slot_object_size * face->num_glyphs );
    
    3156 3156
           //error           = FT_Set_Char_Size( face, 0, 160 * 64, 300, 300 );
    
    3157
    -      error           = FT_Set_Pixel_Sizes( face, 0, 50);
    
    3157
    +      error           = FT_Set_Pixel_Sizes( face, 0, 500);
    
    3158 3158
           // int glyph_index = FT_Get_Char_Index( face, 'A' );
    
    3159 3159
           // error           = FT_Load_Glyph( face, glyph_index, FT_LOAD_NO_HINTING );
    
    3160 3160
     
    

  • src/dense/ftdense.c
    ... ... @@ -454,6 +454,7 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target, FT_PreLine pl
    454 454
     
    
    455 455
     __m128i offset = _mm_setzero_si128();
    
    456 456
       __m128i mask   = _mm_set1_epi32( 0x0c080400 );
    
    457
    +  __m128i nzero = _mm_castps_si128(_mm_set1_ps(-0.0));
    
    457 458
     
    
    458 459
       for (int i = 0; i < worker->m_h*worker->m_w; i += 4)
    
    459 460
       {
    
    ... ... @@ -463,18 +464,31 @@ __m128i offset = _mm_setzero_si128();
    463 464
     
    
    464 465
         x = _mm_add_epi32( x, _mm_slli_si128( x, 4 ) );
    
    465 466
     
    
    466
    -    x = _mm_add_epi32(
    
    467
    -        x, _mm_castps_si128( _mm_shuffle_ps( _mm_setzero_ps(),
    
    468
    -                                             _mm_castsi128_ps( x ), 0x40 ) ) );
    
    467
    +    // x = _mm_add_epi32(
    
    468
    +    //     x, _mm_castps_si128( _mm_shuffle_ps( _mm_setzero_ps(),
    
    469
    +    //                                          _mm_castsi128_ps( x ), 0x40 ) ) );
    
    470
    +    x = _mm_add_epi32(x, _mm_slli_si128(x,8));
    
    469 471
     
    
    470 472
         // add the prefsum of previous 4 floats to all current floats
    
    471 473
         x = _mm_add_epi32( x, offset );
    
    472 474
     
    
    475
    +
    
    476
    +
    
    477
    +    // __m128 y = _mm_mul_ps(_mm_castsi128_ps(x), _mm_set1_ps(255.9));
    
    478
    +
    
    479
    +    // y = _mm_andnot_ps(_mm_castsi128_ps(nzero), y);
    
    480
    +
    
    481
    +    // __m128i z = _mm_cvttps_epi32(y);
    
    482
    +    // z = _mm_packus_epi16(_mm_packs_epi32(z, nzero), nzero);
    
    483
    +
    
    484
    +    // *((int*)dest+i) = _mm_extract_epi16(z, 0);
    
    485
    +
    
    486
    +
    
    473 487
         // take absolute value
    
    474 488
         __m128i y = _mm_abs_epi32( x );  // fabs(x)
    
    475 489
     
    
    476 490
         // cap max value to 1
    
    477
    -    y = _mm_min_epi32( y, _mm_set1_epi32( 4080 ) );
    
    491
    +    y = _mm_min_epi32( y, _mm_set1_epi32( 4094 ) );
    
    478 492
     
    
    479 493
         // reduce to 255
    
    480 494
         y = _mm_srli_epi32( y, 4 );
    
    ... ... @@ -488,6 +502,7 @@ __m128i offset = _mm_setzero_si128();
    488 502
         offset = _mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps( x ),
    
    489 503
                                                    _mm_castsi128_ps( x ),
    
    490 504
                                                    _MM_SHUFFLE( 3, 3, 3, 3 ) ) );
    
    505
    +    //offset = _mm_set1_epi32(_mm_extract_epi32(x, 3));
    
    491 506
       }
    
    492 507
     
    
    493 508
     #else /* FT_SSE4_1 */
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]