FAQ Q: What's this? A: Dependency-free Mastodon full-text search using PostgreSQL text search Q: Who can search what? A: Logged-in users can search all public posts the server has seen. Q: How do I install it? A: $ patch -p1 < mastodon-search.patch $ RAILS_ENV=production bin/rails db:migrate $ RAILS_ENV=production bin/rails console > Status.reindex_all_for_search Q: How long will reindexing take? A: A slow server does about ten thousand statuses per minute. Q: Does this work with Mastodon forks? A: Probably. It was built against Glitch and appears to work fine with stock Mastodon, but verify correctness yourself before running random code from anonymous strangers on production servers. Q: Gargon says full-text search enables negative social dynamics. Is this a good idea? A: I think it is. Letting strangers talk to each other over the internet enables negative social dynamics, but if you're using Mastodon you've evidently decided it's a net win. Search helps people find others with common interests, and helps server admins find content/people they want to remove. Q: Why is this anonymous? A: A few people who don't like search get really mad about it. I don't want to deal with the drama. https://cathode.church/fedi-scraper-counter.html --- a/app/models/status.rb +++ b/app/models/status.rb @@ -116,6 +116,8 @@ class Status < ApplicationRecord scope :not_local_only, -> { where(local_only: [false, nil]) } + scope :search_for, ->(q) { where("tsvector @@ websearch_to_tsquery(?)", q) } + cache_associated :application, :media_attachments, :conversation, @@ -147,6 +149,8 @@ class Status < ApplicationRecord ids << account_id if local? + ids << Account.local.pluck(:id) if visibility == "public" + if preloaded.nil? ids += mentions.joins(:account).merge(Account.local).active.pluck(:account_id) ids += favourites.joins(:account).merge(Account.local).pluck(:account_id) @@ -324,6 +328,8 @@ class Status < ApplicationRecord around_create Mastodon::Snowflake::Callbacks + before_save :index_for_search + after_create :set_poll_id class << self @@ -503,6 +509,28 @@ class Status < ApplicationRecord update_attribute(:deleted_at, discard_time) end + + def index_for_search + search_string = self.searchable_text.downcase + search_string.gsub!(/[^\w]/, ' ') + tsv = ActiveRecord::Base.connection.execute("select to_tsvector('#{search_string}')").first['to_tsvector'] + self.tsvector = tsv + end + + def reindex_for_search + begin + self.update_attribute(:tsvector, self.index_for_search) + rescue StandardError + nil + end + end + + def self.reindex_all_for_search + Status.where(visibility: 'public').in_batches.each_record do |status| + status.reindex_for_search + end + end + def unlink_from_conversations! return unless direct_visibility? @@ -511,6 +539,7 @@ class Status < ApplicationRecord inbox_owners.each do |inbox_owner| AccountConversation.remove_status(inbox_owner, self) + end end --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -35,6 +35,8 @@ class SearchService < BaseService end def perform_statuses_search! + return Status.where(visibility: 'public').search_for(@query).offset(@offset).limit(@limit) unless Chewy.enabled? + definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id })) definition = definition.filter(term: { account_id: @options[:account_id] }) if @options[:account_id].present? @@ -54,6 +56,7 @@ class SearchService < BaseService results.reject { |status| StatusFilter.new(status, @account, preloaded_relations).filtered? } rescue Faraday::ConnectionFailed, Parslet::ParseFailed [] + end def perform_hashtags_search! @@ -86,7 +89,6 @@ class SearchService < BaseService end def full_text_searchable? - return false unless Chewy.enabled? statuses_search? && !@account.nil? && !((@query.start_with?('#') || @query.include?('@')) && !@query.include?(' ')) end new file mode 100644 --- /dev/null +++ b/db/migrate/20230226185028_add_tsvector_to_statuses.rb @@ -0,0 +1,8 @@ +class AddTsvectorToStatuses < ActiveRecord::Migration[6.1] +disable_ddl_transaction! + + def change + add_column :statuses, :tsvector, :tsvector + add_index :statuses, :tsvector, using: :gin, algorithm: :concurrently + end +end