package phrasestream

import (
	"bytes"
	"io"
	"slices"
	"sort"
	"strings"
	"testing"
)

type maskingTest struct {
	input    string
	phrases  []string
	expected string
}

var maskingTests = []maskingTest{
	{
		input:    "0xx1",
		phrases:  []string{"xx200000000000000000000000000000", "xx000", "x"},
		expected: "0[MASKED][MASKED]1",
	},
	{
		input:    "xx0xxxxx0",
		phrases:  []string{"x0xxxxx", "xx10000", "xx20000"},
		expected: "x[MASKED]0",
	},
	{
		input:    "x|xxx",
		phrases:  []string{"xxx"},
		expected: "[MASKED]x",
	},
	{
		input:    "bx|bxxx",
		phrases:  []string{"bxxx"},
		expected: "bx[MASKED]",
	},
	{
		input:    "bx|bxxx",
		phrases:  []string{"bxxx", "bx"},
		expected: "[MASKED][MASKED]",
	},
	{
		input:    "bxxxxxxxxxxxxxxxxxxxxx",
		phrases:  []string{"bxxxxxxxxxxxxxxxxxxxxxx", "xx"},
		expected: "b[MASKED][MASKED][MASKED][MASKED][MASKED][MASKED][MASKED][MASKED][MASKED][MASKED]x",
	},
	{
		input: "0000|0",
		phrases: []string{
			"000010000000000000000000000000000000000000000000",
			"00",
		},
		expected: "[MASKED][MASKED]0",
	},
	{
		input: "secrets|0",
		phrases: []string{
			"secretstuff",
			"secret",
		},
		expected: "[MASKED]s0",
	},
	{
		input:    "\xf6\xf6\xf6\xf6",
		phrases:  []string{"\xf6\xf6\xf600000", "\xf6\xf6"},
		expected: "[MASKED][MASKED]",
	},
	{
		input:    "0",
		phrases:  []string{"000"},
		expected: "0",
	},
	{
		input:    "00hhh",
		phrases:  []string{"hhh00", "hh"},
		expected: "00[MASKED]h",
	},
	{
		input:    "0000",
		phrases:  []string{"00000000", "00"},
		expected: "[MASKED][MASKED]",
	},
	{
		input:    "k00",
		phrases:  []string{"00", "k0"},
		expected: "[MASKED]0",
	},
	{
		input:    "ss|k",
		phrases:  []string{"s}", "k\x01"},
		expected: "ssk",
	},
	{
		input:    "t simples",
		phrases:  []string{"simple", "sample", "stumple", "test"},
		expected: "t [MASKED]s",
	},
	{
		input:    "empty secrets have no affect",
		phrases:  []string{""},
		expected: "empty secrets have no affect",
	},
	{
		input:    "p|l|e|a|s|e",
		phrases:  []string{"play", "plonk", "plink", "pleas"},
		expected: "[MASKED]e",
	},
	{
		input:    "no escaping at all",
		expected: "no escaping at all",
	},
	{
		input:    "fooo|baz",
		phrases:  []string{"fooobar", "ooo", "bazzz"},
		expected: "f[MASKED]baz",
	},
	{
		input:    "fooo|baz|ba|z",
		phrases:  []string{"fooobar", "fooocar", "fo", "bar", "zbaz"},
		expected: "[MASKED]ooba[MASKED]",
	},
	{
		input:    "fooo|bazz",
		phrases:  []string{"fooobar", "fooocar", "fo"},
		expected: "[MASKED]oobazz",
	},
	{
		input:    "fooo|baz|",
		phrases:  []string{"fooobar", "fooocar", "fo"},
		expected: "[MASKED]oobaz",
	},
	{
		input:    "fooo|baz",
		phrases:  []string{"fooobar", "ooo"},
		expected: "f[MASKED]baz",
	},
	{
		input:    "secrets",
		phrases:  []string{"secrets"},
		expected: "[MASKED]",
	},
	{
		input:    "hello secret|s",
		phrases:  []string{"secrets"},
		expected: "hello [MASKED]",
	},
	{
		input:    "s|ecrets",
		phrases:  []string{"secrets"},
		expected: "[MASKED]",
	},
	{
		input:    "secretssecrets",
		phrases:  []string{"secrets"},
		expected: "[MASKED][MASKED]",
	},
	{
		input:    "ssecrets",
		phrases:  []string{"secrets"},
		expected: "s[MASKED]",
	},
	{
		input:    "s|secrets",
		phrases:  []string{"secrets"},
		expected: "s[MASKED]",
	},
	{
		input:    "at the start of the buffer",
		phrases:  []string{"at"},
		expected: "[MASKED] the start of the buffer",
	},
	{
		input:    "in the middle of the buffer",
		phrases:  []string{"middle"},
		expected: "in the [MASKED] of the buffer",
	},
	{
		input:    "at the end of the buffer",
		phrases:  []string{"buffer"},
		expected: "at the end of the [MASKED]",
	},
	{
		input:    "all values are masked",
		phrases:  []string{"all", "values", "are", "masked"},
		expected: "[MASKED] [MASKED] [MASKED] [MASKED]",
	},
	{
		input:    "prefixed and suffixed: xfoox ybary ffoo barr ffooo bbarr",
		phrases:  []string{"foo", "bar"},
		expected: "prefixed and suffixed: x[MASKED]x y[MASKED]y f[MASKED] [MASKED]r f[MASKED]o b[MASKED]r",
	},
	{
		input:    "prefix|ed, su|ffi|xed |and split|:| xfo|ox y|bary ffo|o ba|rr ffooo b|barr",
		phrases:  []string{"foo", "bar"},
		expected: "prefixed, suffixed and split: x[MASKED]x y[MASKED]y f[MASKED] [MASKED]r f[MASKED]o b[MASKED]r",
	},
	{
		input:    "sp|lit al|l val|ues ar|e |mask|ed",
		phrases:  []string{"split", "all", "values", "are", "masked"},
		expected: "[MASKED] [MASKED] [MASKED] [MASKED] [MASKED]",
	},
	{
		input:    "prefix_mask mask prefix_|mask prefix_ma|sk mas|k",
		phrases:  []string{"mask", "prefix_mask"},
		expected: "[MASKED] [MASKED] [MASKED] [MASKED] [MASKED]",
	},
	{
		input:    "overlap: this is the en| foobar",
		phrases:  []string{"this is the end", "en foobar", "en"},
		expected: "overlap: this is the [MASKED]",
	},
	{
		input:    "overlapping: ffoo barr",
		phrases:  []string{"foo", "bar", "foo bar"},
		expected: "overlapping: f[MASKED]r",
	},
	{
		input:    "overlapping: ffoo bars",
		phrases:  []string{"foo", "bar", "foo bar"},
		expected: "overlapping: f[MASKED]s",
	},
	{
		input:    "overlapping: afoo barr",
		phrases:  []string{"foo", "bar", "foo bar"},
		expected: "overlapping: a[MASKED]r",
	},

	{
		input:    "overlapping patterns: foofoobar",
		phrases:  []string{"foo", "foobar"},
		expected: "overlapping patterns: [MASKED][MASKED]",
	},
	{
		input:    "partial match interrupt: fo|ofo|o",
		phrases:  []string{"foo", "foofoo"},
		expected: "partial match interrupt: [MASKED]",
	},
	{
		input:    "f|o|o|b|a|r",
		phrases:  []string{"foobar", "foo"},
		expected: "[MASKED]",
	},
	{
		input:    "multiple matches: foo foobar foobarbaz",
		phrases:  []string{"foo", "foobar", "foobarbaz"},
		expected: "multiple matches: [MASKED] [MASKED] [MASKED]",
	},
	{
		input:    "interrupted near-match: fooba|x|foobar",
		phrases:  []string{"foobar"},
		expected: "interrupted near-match: foobax[MASKED]",
	},
	{
		input:    "fo|ob|ar|fo|ox",
		phrases:  []string{"foobar", "foox"},
		expected: "[MASKED][MASKED]",
	},
}

func TestMasking(t *testing.T) {
	for _, tc := range maskingTests {
		t.Run(tc.input, func(t *testing.T) {
			buf := new(bytes.Buffer)

			m := New(buf)
			for _, value := range tc.phrases {
				m.Add(value, Mask, nil)
			}

			parts := bytes.Split([]byte(tc.input), []byte{'|'})
			for _, part := range parts {
				n, err := m.Write(part)
				if err != nil {
					t.Fatal(err)
				}

				if len(part) != n {
					t.Errorf("expected write length %d, got %d", len(part), n)
				}
			}

			if err := m.Close(); err != nil {
				t.Fatal(err)
			}

			if tc.expected != buf.String() {
				t.Errorf("expected %v, got %v", tc.expected, buf.String())
			}
		})
	}
}

func TestPhraseCommand(t *testing.T) {
	tests := []struct {
		input    string
		phrase   string
		stop     func(byte) bool
		expected string
		matches  []string
	}{
		{
			input:    "::add-mask::secret\n",
			phrase:   "::add-mask::",
			stop:     StopCRLF,
			expected: "[MASKED]\n",
			matches:  []string{"secret"},
		},
		{
			input:    ":|:add|-mask|:|:H|ELL|O\n::add-mask::hello",
			phrase:   "::add-mask::",
			stop:     StopCRLF,
			expected: "[MASKED]\n[MASKED]",
			matches:  []string{"HELLO", "hello"},
		},
	}

	for _, tc := range tests {
		t.Run(tc.input, func(t *testing.T) {
			buf := new(bytes.Buffer)

			m := New(buf)

			var matches []string
			m.Add(tc.phrase, func(w io.Writer, _ string, buf []byte) error {
				matches = append(matches, string(buf))
				w.Write([]byte("[MASKED]"))
				return nil
			}, tc.stop)

			parts := bytes.Split([]byte(tc.input), []byte{'|'})
			for _, part := range parts {
				n, err := m.Write(part)
				if err != nil {
					t.Fatal(err)
				}
				if len(part) != n {
					t.Errorf("expected write length %d, got %d", len(part), n)
				}
			}

			if err := m.Close(); err != nil {
				t.Fatal(err)
			}

			if !slices.Equal(tc.matches, matches) {
				t.Errorf("expected matches %v, got %v", tc.matches, matches)
			}
			if tc.expected != buf.String() {
				t.Errorf("expected %v, got %v", tc.expected, buf.String())
			}
		})
	}
}

func TestDynamicAdding(t *testing.T) {
	buf := new(bytes.Buffer)
	m := New(buf)

	for _, add := range []string{"xx200000000000000000000000000000", "xx000"} {
		m.Add(add, Mask, nil)
	}

	if _, err := m.Write([]byte("0xx")); err != nil {
		t.Fatal(err)
	}

	m.Add("x", Mask, nil)

	if _, err := m.Write([]byte("1")); err != nil {
		t.Fatal(err)
	}

	if err := m.Close(); err != nil {
		t.Fatal(err)
	}

	const expected = "0x[MASKED]1"
	if expected != buf.String() {
		t.Errorf("expected %v, got %v", expected, buf.String())
	}
}

func convertMaskingTestsToFuzzInput() [][]byte {
	var results [][]byte

	for _, tc := range maskingTests {
		var result []byte

		result = append(result, byte(len(tc.phrases)))
		for _, phrase := range tc.phrases {
			result = append(result, byte(len(phrase)))
			result = append(result, []byte(phrase)...)
		}

		chunks := strings.Split(tc.input, "|")
		result = append(result, byte(len(chunks)))

		for _, chunk := range chunks {
			result = append(result, byte(len(chunk)))
			result = append(result, []byte(chunk)...)
		}

		results = append(results, result)
	}

	return results
}

func parseFuzzInput(data []byte) (phrases []string, chunks [][]byte) {
	if len(data) == 0 {
		return nil, nil
	}

	n := int(data[0])
	data = data[1:]

	phrases = make([]string, 0, n)
	for i := 0; i < n; i++ {
		if len(data) == 0 {
			break
		}

		n := min(int(data[0]), len(data[1:]))
		phrase := data[1 : 1+n]
		phrases = append(phrases, string(phrase))
		data = data[n+1:]
	}

	for len(data) > 0 {
		n := min(int(data[0]), len(data[1:]))
		chunk := data[1 : 1+n]
		chunks = append(chunks, chunk)
		data = data[n+1:]
	}

	return phrases, chunks
}

func FuzzSecretMasker(f *testing.F) {
	// seed corpus from test data
	for _, input := range convertMaskingTestsToFuzzInput() {
		f.Add(input)
	}

	f.Fuzz(func(t *testing.T, data []byte) {
		secrets, chunks := parseFuzzInput(data)
		input := bytes.Join(chunks, nil)

		var buf bytes.Buffer
		masker := New(&buf)

		unique := make(map[string]struct{})

		var phrases []string
		for _, s := range secrets {
			if len(s) == 0 {
				continue
			}
			str := string(s)
			if _, ok := unique[str]; !ok {
				unique[str] = struct{}{}
				phrases = append(phrases, str)
				masker.Add(str, Mask, nil)
			}
		}

		for _, chunk := range chunks {
			_, err := masker.Write(chunk)
			if err != nil {
				t.Fatal(err)
			}
		}

		if err := masker.Close(); err != nil {
			t.Fatal(err)
		}

		maskedOutput := buf.String()

		sort.SliceStable(phrases, func(i, j int) bool {
			return len(phrases[i]) > len(phrases[j])
		})

		var replace []string
		for _, phrase := range phrases {
			replace = append(replace, phrase, "[MASKED]")
		}

		expectedOutput := strings.NewReplacer(replace...).Replace(string(input))

		if maskedOutput != expectedOutput {
			t.Errorf(`
		MASKING FAILURE
		=====================
		phrases (%d): %q
		input (chunked): %q
		input: %q
		---
		expected: %q
		actual:   %q
		=====================`,
				len(phrases),
				phrases,
				chunks,
				input,
				expectedOutput,
				maskedOutput,
			)
		}
	})
}
